[llvm] r332357 - [X86] Split off F16C WriteCvtPH2PS/WriteCvtPS2PH scheduler classes

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue May 15 07:12:32 PDT 2018


Author: rksimon
Date: Tue May 15 07:12:32 2018
New Revision: 332357

URL: http://llvm.org/viewvc/llvm-project?rev=332357&view=rev
Log:
[X86] Split off F16C WriteCvtPH2PS/WriteCvtPS2PH scheduler classes

Btver2 - VCVTPH2PSYrm needs to double pump the AGU
Broadwell - missing VCVTPS2PH*mr stores extra latency

Allows us to remove the WriteCvtF2FSt conversion store class

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
    llvm/trunk/lib/Target/X86/X86SchedHaswell.td
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/trunk/lib/Target/X86/X86Schedule.td
    llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
    llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
    llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
    llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue May 15 07:12:32 2018
@@ -7863,16 +7863,16 @@ multiclass avx512_cvtph2ps_sae<X86Vector
 
 let Predicates = [HasAVX512] in
   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
-                                    WriteCvtF2F>,
+                                    WriteCvtPH2PSY>,
                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtF2F>,
                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
 
 let Predicates = [HasVLX] in {
   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
-                       loadv2i64, WriteCvtF2F>, EVEX, EVEX_V256,
+                       loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
                        EVEX_CD8<32, CD8VH>;
   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
-                       loadv2i64, WriteCvtF2F>, EVEX, EVEX_V128,
+                       loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
                        EVEX_CD8<32, CD8VH>;
 
   // Pattern match vcvtph2ps of a scalar i64 load.
@@ -7886,42 +7886,46 @@ let Predicates = [HasVLX] in {
 }
 
 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
-                           X86MemOperand x86memop> {
+                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
   defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
                    (ins _src.RC:$src1, i32u8imm:$src2),
                    "vcvtps2ph", "$src2, $src1", "$src1, $src2",
                    (X86cvtps2ph (_src.VT _src.RC:$src1),
                                 (i32 imm:$src2)), 0, 0>,
-                   AVX512AIi8Base, Sched<[WriteCvtF2F]>;
+                   AVX512AIi8Base, Sched<[RR]>;
   let hasSideEffects = 0, mayStore = 1 in {
     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-               Sched<[WriteCvtF2FSt]>;
+               Sched<[MR]>;
     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
-                EVEX_K, Sched<[WriteCvtF2FSt]>;
+                EVEX_K, Sched<[MR]>;
   }
 }
 
-multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
+multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
+                               SchedWrite Sched> {
   let hasSideEffects = 0 in
   defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
                    (outs _dest.RC:$dst),
                    (ins _src.RC:$src1, i32u8imm:$src2),
                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
-                   EVEX_B, AVX512AIi8Base, Sched<[WriteCvtF2F]>;
+                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
 }
 
 let Predicates = [HasAVX512] in {
-  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
-                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
+  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
+                                    WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
+                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PH>,
                                         EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
   let Predicates = [HasVLX] in {
-    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
+    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
+                                         WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
                                          EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
-    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
+    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
+                                         WriteCvtPS2PH, WriteCvtPS2PHSt>,
                                          EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
   }
 

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue May 15 07:12:32 2018
@@ -7303,37 +7303,41 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4
 // Half precision conversion instructions
 //
 
-multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> {
+multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
+                      X86FoldableSchedWrite sched> {
   def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
              "vcvtph2ps\t{$src, $dst|$dst, $src}",
              [(set RC:$dst, (X86cvtph2ps VR128:$src))]>,
-             T8PD, VEX, Sched<[WriteCvtF2F]>;
+             T8PD, VEX, Sched<[sched]>;
   let hasSideEffects = 0, mayLoad = 1 in
   def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
              "vcvtph2ps\t{$src, $dst|$dst, $src}",
              [(set RC:$dst, (X86cvtph2ps (bc_v8i16
                                           (loadv2i64 addr:$src))))]>,
-             T8PD, VEX, Sched<[WriteCvtF2FLd]>;
+             T8PD, VEX, Sched<[sched.Folded]>;
 }
 
-multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop> {
+multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
+                      SchedWrite RR, SchedWrite MR> {
   def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
                (ins RC:$src1, i32u8imm:$src2),
                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
-               TAPD, VEX, Sched<[WriteCvtF2F]>;
+               TAPD, VEX, Sched<[RR]>;
   let hasSideEffects = 0, mayStore = 1 in
   def mr : Ii8<0x1D, MRMDestMem, (outs),
                (ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-               TAPD, VEX, Sched<[WriteCvtF2FSt]>;
+               TAPD, VEX, Sched<[MR]>;
 }
 
 let Predicates = [HasF16C, NoVLX] in {
-  defm VCVTPH2PS  : f16c_ph2ps<VR128, f64mem>;
-  defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem>, VEX_L;
-  defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem>;
-  defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem>, VEX_L;
+  defm VCVTPH2PS  : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>;
+  defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L;
+  defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH,
+                               WriteCvtPS2PHSt>;
+  defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY,
+                               WriteCvtPS2PHYSt>, VEX_L;
 
   // Pattern match vcvtph2ps of a scalar i64 load.
   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),

Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Tue May 15 07:12:32 2018
@@ -257,12 +257,6 @@ defm : BWWriteResPair<WriteFBlendY, [BWP
 defm : BWWriteResPair<WriteFVarBlend,  [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
 defm : BWWriteResPair<WriteFVarBlendY, [BWPort5], 2, [2], 2, 6>; // Fp vector variable blends.
 
-def  : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
-  let Latency = 4;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-
 // FMA Scheduling helper class.
 // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
 
@@ -353,6 +347,16 @@ defm : BWWriteResPair<WriteCvtF2I, [BWPo
 defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
 defm : BWWriteResPair<WriteCvtF2F, [BWPort1], 3>; // Float -> Float size conversion.
 
+defm : X86WriteRes<WriteCvtPH2PS,     [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY,    [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd,  [BWPort0,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH,    [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY,   [BWPort1,BWPort5], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt,  [BWPort1,BWPort4,BWPort237], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [BWPort1,BWPort4,BWPort237], 7, [1,1,1], 3>;
+
 // Strings instructions.
 
 // Packed Compare Implicit Length Strings, Return Mask
@@ -625,8 +629,7 @@ def BWWriteResGroup15 : SchedWriteRes<[B
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PS(Y?)rr",
-                                            "(V?)CVTPS2PDrr",
+def: InstRW<[BWWriteResGroup15], (instregex "(V?)CVTPS2PDrr",
                                             "(V?)CVTSS2SDrr")>;
 
 def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> {
@@ -825,7 +828,6 @@ def: InstRW<[BWWriteResGroup42], (instre
                                             "MMX_CVT(T?)PS2PIirr",
                                             "(V?)CVTDQ2PDrr",
                                             "(V?)CVTPD2PSrr",
-                                            "VCVTPS2PHrr",
                                             "(V?)CVTSD2SSrr",
                                             "(V?)CVTSI642SDrr",
                                             "(V?)CVTSI2SDrr",
@@ -963,8 +965,7 @@ def BWWriteResGroup59 : SchedWriteRes<[B
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup59], (instregex "VCVTPH2PS(Y?)rm",
-                                            "(V?)CVTPS2PDrm",
+def: InstRW<[BWWriteResGroup59], (instregex "(V?)CVTPS2PDrm",
                                             "(V?)CVTSS2SDrm",
                                             "VPSLLVQrm",
                                             "VPSRLVQrm")>;
@@ -976,7 +977,6 @@ def BWWriteResGroup60 : SchedWriteRes<[B
 }
 def: InstRW<[BWWriteResGroup60], (instregex "VCVTDQ2PDYrr",
                                             "VCVTPD2PSYrr",
-                                            "VCVTPS2PHYrr",
                                             "VCVT(T?)PD2DQYrr")>;
 
 def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {

Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Tue May 15 07:12:32 2018
@@ -251,11 +251,15 @@ defm : HWWriteResPair<WriteFVarShuffle25
 defm : HWWriteResPair<WriteFVarBlend,  [HWPort5], 2, [2], 2, 6>;
 defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
 
-def  : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> {
-  let Latency = 5;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
+defm : X86WriteRes<WriteCvtPH2PS,     [HWPort0,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY,    [HWPort0,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd,  [HWPort0,HWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort0,HWPort23], 7, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH,    [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY,   [HWPort1,HWPort5], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt,  [HWPort1,HWPort4,HWPort5,HWPort237], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [HWPort1,HWPort4,HWPort5,HWPort237], 7, [1,1,1,1], 4>;
 
 // Vector integer operations.
 defm : X86WriteRes<WriteVecLoad,         [HWPort23], 5, [1], 1>;
@@ -868,16 +872,14 @@ def HWWriteResGroup11 : SchedWriteRes<[H
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup11], (instregex "VCVTPH2PSrm",
-                                            "(V?)CVTPS2PDrm")>;
+def: InstRW<[HWWriteResGroup11], (instregex "(V?)CVTPS2PDrm")>;
 
 def HWWriteResGroup11_1 : SchedWriteRes<[HWPort0,HWPort23]> {
   let Latency = 7;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup11_1], (instregex "VCVTPH2PSYrm",
-                                              "(V?)CVTSS2SDrm",
+def: InstRW<[HWWriteResGroup11_1], (instregex "(V?)CVTSS2SDrm",
                                               "VPSLLVQrm",
                                               "VPSRLVQrm")>;
 
@@ -1076,9 +1078,7 @@ def HWWriteResGroup31 : SchedWriteRes<[H
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup31], (instregex "VCVTPH2PSYrr",
-                                            "VCVTPH2PSrr",
-                                            "(V?)CVTPS2PDrr",
+def: InstRW<[HWWriteResGroup31], (instregex "(V?)CVTPS2PDrr",
                                             "(V?)CVTSS2SDrr")>;
 
 def HWWriteResGroup32 : SchedWriteRes<[HWPort6,HWPort0156]> {
@@ -1397,7 +1397,6 @@ def: InstRW<[HWWriteResGroup73], (instre
                                             "MMX_CVT(T?)PS2PIirr",
                                             "(V?)CVTDQ2PDrr",
                                             "(V?)CVTPD2PSrr",
-                                            "VCVTPS2PHrr",
                                             "(V?)CVTSD2SSrr",
                                             "(V?)CVTSI(64)?2SDrr",
                                             "(V?)CVTSI2SSrr",
@@ -1604,7 +1603,6 @@ def HWWriteResGroup102 : SchedWriteRes<[
 }
 def: InstRW<[HWWriteResGroup102], (instregex "VCVTDQ2PDYrr",
                                              "VCVTPD2PSYrr",
-                                             "VCVTPS2PHYrr",
                                              "VCVT(T?)PD2DQYrr")>;
 
 def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
@@ -1629,13 +1627,6 @@ def HWWriteResGroup105 : SchedWriteRes<[
 def: InstRW<[HWWriteResGroup105], (instregex "SHLD(16|32|64)rrCL",
                                              "SHRD(16|32|64)rrCL")>;
 
-def HWWriteResGroup106 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort237]> {
-  let Latency = 7;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup106], (instregex "VCVTPS2PHYmr")>;
-
 def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> {
   let Latency = 6;
   let NumMicroOps = 4;

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Tue May 15 07:12:32 2018
@@ -235,7 +235,14 @@ defm : SBWriteResPair<WriteFBlend,    [S
 defm : SBWriteResPair<WriteFBlendY,   [SBPort05], 1, [1], 1, 7>;
 defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
 defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
-def  : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; }
+
+defm : SBWriteResPair<WriteCvtPH2PS,   [SBPort1], 3>;
+defm : SBWriteResPair<WriteCvtPH2PSY,  [SBPort1], 3>;
+
+defm : X86WriteRes<WriteCvtPS2PH,    [SBPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHY,   [SBPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHSt,  [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
 
 // Vector integer operations.
 defm : X86WriteRes<WriteVecLoad,         [SBPort23], 5, [1], 1>;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Tue May 15 07:12:32 2018
@@ -249,12 +249,6 @@ defm : SKLWriteResPair<WriteFBlendY, [SK
 defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
 defm : SKLWriteResPair<WriteFVarBlendY,[SKLPort015], 2, [2], 2, 7>; // Fp vector variable blends.
 
-def  : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
-  let Latency = 6;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-
 // FMA Scheduling helper class.
 // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
 
@@ -346,6 +340,16 @@ defm : SKLWriteResPair<WriteCvtF2I, [SKL
 defm : SKLWriteResPair<WriteCvtI2F, [SKLPort1], 4>; // Integer -> Float.
 defm : SKLWriteResPair<WriteCvtF2F, [SKLPort1], 3>; // Float -> Float size conversion.
 
+defm : X86WriteRes<WriteCvtPH2PS,    [SKLPort5,SKLPort015],  5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY,    [SKLPort5,SKLPort01],  7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd,  [SKLPort23,SKLPort01],  9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH,                       [SKLPort5,SKLPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY,                       [SKLPort5,SKLPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt,  [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 6, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 8, [1,1,1,1], 4>;
+
 // Strings instructions.
 
 // Packed Compare Implicit Length Strings, Return Mask
@@ -930,9 +934,7 @@ def: InstRW<[SKLWriteResGroup60], (instr
                                              "MMX_CVT(T?)PS2PIirr",
                                              "(V?)CVT(T?)PD2DQrr",
                                              "(V?)CVTPD2PSrr",
-                                             "VCVTPH2PSrr",
                                              "(V?)CVTPS2PDrr",
-                                             "VCVTPS2PHrr",
                                              "(V?)CVTSD2SSrr",
                                              "(V?)CVTSI642SDrr",
                                              "(V?)CVTSI2SDrr",
@@ -1157,9 +1159,7 @@ def SKLWriteResGroup89 : SchedWriteRes<[
   let ResourceCycles = [1,1];
 }
 def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPD2PSYrr",
-                                             "VCVTPH2PSYrr",
                                              "VCVTPS2PDYrr",
-                                             "VCVTPS2PHYrr",
                                              "VCVT(T?)PD2DQYrr")>;
 
 def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
@@ -1300,13 +1300,6 @@ def SKLWriteResGroup112 : SchedWriteRes<
 }
 def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PH(ADD|SUB)SWrm")>;
 
-def SKLWriteResGroup114 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup114], (instregex "VCVTPS2PHYmr")>;
-
 def SKLWriteResGroup115 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06]> {
   let Latency = 8;
   let NumMicroOps = 5;
@@ -1369,7 +1362,6 @@ def SKLWriteResGroup123 : SchedWriteRes<
   let ResourceCycles = [1,1];
 }
 def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVT(T?)PS2PIirm",
-                                              "VCVTPH2PSrm",
                                               "(V?)CVTPS2PDrm")>;
 
 def SKLWriteResGroup127 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> {
@@ -1418,7 +1410,6 @@ def SKLWriteResGroup134 : SchedWriteRes<
   let ResourceCycles = [1,1];
 }
 def: InstRW<[SKLWriteResGroup134], (instregex "(V?)CVTDQ2PSrm",
-                                              "(V?)CVTPH2PSYrm",
                                               "(V?)CVTPS2DQrm",
                                               "(V?)CVTSS2SDrm",
                                               "(V?)CVTTPS2DQrm")>;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Tue May 15 07:12:32 2018
@@ -249,12 +249,6 @@ defm : SKXWriteResPair<WriteFBlendY,[SKX
 defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
 defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; // Fp vector variable blends.
 
-def  : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
-  let Latency = 6;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-
 // FMA Scheduling helper class.
 // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
 
@@ -346,6 +340,16 @@ defm : SKXWriteResPair<WriteCvtF2I, [SKX
 defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>; // Integer -> Float.
 defm : SKXWriteResPair<WriteCvtF2F, [SKXPort1], 3>; // Float -> Float size conversion.
 
+defm : X86WriteRes<WriteCvtPH2PS,     [SKXPort5,SKXPort015],  5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY,    [SKXPort5,SKXPort015],  7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd,  [SKXPort23,SKXPort015],  9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort015], 10, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH,                        [SKXPort5,SKXPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY,                       [SKXPort5,SKXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt,  [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 6, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 8, [1,1,1,1], 4>;
+
 // Strings instructions.
 
 // Packed Compare Implicit Length Strings, Return Mask
@@ -1050,12 +1054,8 @@ def: InstRW<[SKXWriteResGroup61], (instr
                                              "VCVTPD2PSZ128rr",
                                              "(V?)CVTPD2PSrr",
                                              "VCVTPD2UDQZ128rr",
-                                             "VCVTPH2PSZ128rr",
-                                             "VCVTPH2PSrr",
                                              "VCVTPS2PDZ128rr",
                                              "(V?)CVTPS2PDrr",
-                                             "VCVTPS2PHZ128rr",
-                                             "VCVTPS2PHrr",
                                              "VCVTPS2QQZ128rr",
                                              "VCVTPS2UQQZ128rr",
                                              "VCVTQQ2PSZ128rr",
@@ -1370,9 +1370,7 @@ def: InstRW<[SKXWriteResGroup93], (instr
                                              "VCVTPD2DQ(Y|Z|Z256)rr",
                                              "VCVTPD2PS(Y|Z|Z256)rr",
                                              "VCVTPD2UDQ(Z|Z256)rr",
-                                             "VCVTPH2PS(Y|Z|Z256)rr",
                                              "VCVTPS2PD(Y|Z|Z256)rr",
-                                             "VCVTPS2PH(Y|Z|Z256)rr",
                                              "VCVTPS2QQ(Z|Z256)rr",
                                              "VCVTPS2UQQ(Z|Z256)rr",
                                              "VCVTQQ2PS(Z|Z256)rr",
@@ -1668,13 +1666,6 @@ def SKXWriteResGroup123 : SchedWriteRes<
 }
 def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
 
-def SKXWriteResGroup125 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup125], (instregex "VCVTPS2PHYmr")>;
-
 def SKXWriteResGroup126 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06]> {
   let Latency = 8;
   let NumMicroOps = 5;
@@ -1816,7 +1807,6 @@ def SKXWriteResGroup137 : SchedWriteRes<
   let ResourceCycles = [1,1];
 }
 def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
-                                              "VCVTPH2PSrm",
                                               "(V?)CVTPS2PDrm")>;
 
 def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> {
@@ -1905,7 +1895,6 @@ def: InstRW<[SKXWriteResGroup149], (inst
                                               "(V?)CVTDQ2PSrm",
                                               "VCVTPD2QQZ128rm(b?)",
                                               "VCVTPD2UQQZ128rm(b?)",
-                                              "VCVTPH2PSYrm",
                                               "VCVTPH2PSZ128rm(b?)",
                                               "VCVTPS2DQZ128rm(b?)",
                                               "(V?)CVTPS2DQrm",

Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Tue May 15 07:12:32 2018
@@ -299,7 +299,14 @@ def WriteMMXMOVMSK  : SchedWrite;
 defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
 defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
 defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
-def  WriteCvtF2FSt : SchedWrite; // // Float -> Float + store size conversion.
+
+defm WriteCvtPH2PS    : X86SchedWritePair; // Half -> Float size conversion.
+defm WriteCvtPH2PSY   : X86SchedWritePair; // Half -> Float size conversion (YMM/ZMM).
+
+def  WriteCvtPS2PH    : SchedWrite; // // Float -> Half size conversion.
+def  WriteCvtPS2PHY   : SchedWrite; // // Float -> Half size conversion (YMM/ZMM).
+def  WriteCvtPS2PHSt  : SchedWrite; // // Float -> Half + store size conversion.
+def  WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM/ZMM).
 
 // CRC32 instruction.
 defm WriteCRC32 : X86SchedWritePair;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Tue May 15 07:12:32 2018
@@ -276,7 +276,13 @@ defm : AtomWriteResPair<WriteFVarShuffle
 defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
 defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
 defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
-def  : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+
+defm : AtomWriteResPair<WriteCvtPH2PS,  [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteCvtPH2PSY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def  : WriteRes<WriteCvtPS2PH,          [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def  : WriteRes<WriteCvtPS2PHY,         [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def  : WriteRes<WriteCvtPS2PHSt,        [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def  : WriteRes<WriteCvtPS2PHYSt,       [AtomPort0]>; // NOTE: Doesn't exist on Atom.
 
 ////////////////////////////////////////////////////////////////////////////////
 // Vector integer operations.

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Tue May 15 07:12:32 2018
@@ -363,7 +363,14 @@ defm : JWriteResFpuPair<WriteFVarShuffle
 defm : JWriteResFpuPair<WriteCvtF2I,       [JFPU1, JSTC], 3>; // Float -> Integer.
 defm : JWriteResFpuPair<WriteCvtI2F,       [JFPU1, JSTC], 3>; // Integer -> Float.
 defm : JWriteResFpuPair<WriteCvtF2F,       [JFPU1, JSTC], 3>; // Float -> Float size conversion.
-def  : WriteRes<WriteCvtF2FSt, [JFPU1, JSTC, JSAGU]> { let Latency = 4; }
+
+defm : JWriteResFpuPair<WriteCvtPH2PS,     [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPH2PSY,    [JFPU1, JSTC], 3, [2,2], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH,                 [JFPU1, JSTC], 3, [1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHY,          [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
+defm : X86WriteRes<WriteCvtPS2PHSt,        [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
 
 def JWriteCVTF2F : SchedWriteRes<[JFPU1, JSTC]> {
   let Latency = 7;
@@ -529,38 +536,6 @@ def JWriteINSERTQ: SchedWriteRes<[JFPU01
 def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
 
 ////////////////////////////////////////////////////////////////////////////////
-// F16C instructions.
-////////////////////////////////////////////////////////////////////////////////
-
-def JWriteCVTPS2PHY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
-  let Latency = 6;
-  let ResourceCycles = [2, 2, 2];
-  let NumMicroOps = 3;
-}
-def : InstRW<[JWriteCVTPS2PHY], (instrs VCVTPS2PHYrr)>;
-
-def JWriteCVTPS2PHYSt: SchedWriteRes<[JFPU1, JSTC, JFPX, JSAGU]> {
-  let Latency = 7;
-  let ResourceCycles = [2, 2, 2, 1];
-  let NumMicroOps = 3;
-}
-def : InstRW<[JWriteCVTPS2PHYSt], (instrs VCVTPS2PHYmr)>;
-
-def JWriteCVTPH2PSY: SchedWriteRes<[JFPU1, JSTC]> {
-  let Latency = 3;
-  let ResourceCycles = [2, 2];
-  let NumMicroOps = 2;
-}
-def : InstRW<[JWriteCVTPH2PSY], (instrs VCVTPH2PSYrr)>;
-
-def JWriteCVTPH2PSYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
-  let Latency = 8;
-  let ResourceCycles = [1, 2, 2];
-  let NumMicroOps = 2;
-}
-def : InstRW<[JWriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>;
-
-////////////////////////////////////////////////////////////////////////////////
 // AVX instructions.
 ////////////////////////////////////////////////////////////////////////////////
 

Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Tue May 15 07:12:32 2018
@@ -212,7 +212,6 @@ defm : SLMWriteResPair<WriteFShuffleY, [
 defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteFBlend,  [SLM_FPC_RSV0],  1>;
-def  : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
 
 // Vector integer operations.
 def  : WriteRes<WriteVecLoad,         [SLM_MEC_RSV]> { let Latency = 3; }
@@ -397,4 +396,11 @@ defm : SLMWriteResPair<WriteFMA, [SLM_FP
 defm : SLMWriteResPair<WriteFMAX, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteFMAY, [SLM_FPC_RSV0],  1>;
 
+defm : SLMWriteResPair<WriteCvtPH2PS,  [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteCvtPH2PSY, [SLM_FPC_RSV0],  1>;
+def  : WriteRes<WriteCvtPS2PH,    [SLM_FPC_RSV0]>;
+def  : WriteRes<WriteCvtPS2PHY,   [SLM_FPC_RSV0]>;
+def  : WriteRes<WriteCvtPS2PHSt,  [SLM_FPC_RSV0]>;
+def  : WriteRes<WriteCvtPS2PHYSt, [SLM_FPC_RSV0]>;
+
 } // SchedModel

Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Tue May 15 07:12:32 2018
@@ -272,7 +272,6 @@ defm : ZnWriteResFpuPair<WriteFSqrt64X,
 defm : ZnWriteResFpuPair<WriteFSqrt64Y,  [ZnFPU3], 40, [40], 1, 7, 1>;
 defm : ZnWriteResFpuPair<WriteFSqrt64Z,  [ZnFPU3], 40, [40], 1, 7, 1>;
 defm : ZnWriteResFpuPair<WriteFSqrt80,   [ZnFPU3], 20, [20]>;
-def  : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
 
 // Vector integer operations which uses FPU units
 defm : X86WriteRes<WriteVecLoad,         [ZnAGU], 8, [1], 1>;
@@ -1326,18 +1325,21 @@ def : InstRW<[ZnWriteCVSTSI2SIr], (instr
 // r32,m32.
 def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
 
-
 // VCVTPS2PH.
 // x,v,i.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)rr")>;
+def : SchedAlias<WriteCvtPS2PH,    ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPS2PHY,   ZnWriteMicrocoded>;
 // m,v,i.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)mr")>;
+def : SchedAlias<WriteCvtPS2PHSt,  ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>;
 
 // VCVTPH2PS.
 // v,x.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rr")>;
+def : SchedAlias<WriteCvtPH2PS,    ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPH2PSY,   ZnWriteMicrocoded>;
 // v,m.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rm")>;
+def : SchedAlias<WriteCvtPH2PSLd,  ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>;
 
 //-- SSE4A instructions --//
 // EXTRQ

Modified: llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/f16c-schedule.ll?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/f16c-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/f16c-schedule.ll Tue May 15 07:12:32 2018
@@ -143,7 +143,7 @@ define <8 x i16> @test_vcvtps2ph_128(<4
 ; BROADWELL-LABEL: test_vcvtps2ph_128:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
+; BROADWELL-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_vcvtps2ph_128:
@@ -196,7 +196,7 @@ define <8 x i16> @test_vcvtps2ph_256(<8
 ; BROADWELL-LABEL: test_vcvtps2ph_256:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
+; BROADWELL-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;

Modified: llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s Tue May 15 07:12:32 2018
@@ -27,9 +27,9 @@ vcvtps2ph   $0, %ymm0, (%rax)
 # CHECK-NEXT:  2      2     1.00                    	vcvtph2ps	%xmm0, %ymm2
 # CHECK-NEXT:  2      6     1.00    *               	vcvtph2ps	(%rax), %ymm2
 # CHECK-NEXT:  2      4     1.00                    	vcvtps2ph	$0, %xmm0, %xmm2
-# CHECK-NEXT:  3      4     1.00           *        	vcvtps2ph	$0, %xmm0, (%rax)
+# CHECK-NEXT:  3      5     1.00           *        	vcvtps2ph	$0, %xmm0, (%rax)
 # CHECK-NEXT:  2      6     1.00                    	vcvtps2ph	$0, %ymm0, %xmm2
-# CHECK-NEXT:  3      4     1.00           *        	vcvtps2ph	$0, %ymm0, (%rax)
+# CHECK-NEXT:  3      7     1.00           *        	vcvtps2ph	$0, %ymm0, (%rax)
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0] - BWDivider

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s Tue May 15 07:12:32 2018
@@ -49,14 +49,14 @@ vcvtps2ph   $0, %ymm0, (%rax)
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
-# CHECK-NEXT:  -      -      -     2.00   2.00    -     12.00  2.00    -     2.00   12.00   -      -      -
+# CHECK-NEXT:  -      -      -     2.00   2.00    -     12.00  3.00    -     2.00   12.00   -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   	Instructions:
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -     1.00    -      -      -     	vcvtph2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -     1.00   1.00    -      -     1.00    -      -      -     	vcvtph2ps	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -     2.00    -      -      -     2.00    -      -      -     	vcvtph2ps	%xmm0, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -     2.00   1.00    -      -     2.00    -      -      -     	vcvtph2ps	(%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -     2.00   2.00    -      -     2.00    -      -      -     	vcvtph2ps	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -     1.00    -      -      -     	vcvtps2ph	$0, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     	vcvtps2ph	$0, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -     1.00   1.00    -     2.00    -      -      -     2.00    -      -      -     	vcvtps2ph	$0, %ymm0, %xmm2




More information about the llvm-commits mailing list