[llvm] r330737 - [X86][F16C] Add WriteCvtF2FSt scheduling class

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 24 09:43:07 PDT 2018


Author: rksimon
Date: Tue Apr 24 09:43:07 2018
New Revision: 330737

URL: http://llvm.org/viewvc/llvm-project?rev=330737&view=rev
Log:
[X86][F16C] Add WriteCvtF2FSt scheduling class

Fixes the classification of VCVTPS2PHmr/VCVTPS2PHYmr which were tagged as WriteCvtF2FLd_WriteRMW (PR36887)

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
    llvm/trunk/lib/Target/X86/X86SchedHaswell.td
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/trunk/lib/Target/X86/X86Schedule.td
    llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
    llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
    llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
    llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Apr 24 09:43:07 2018
@@ -7645,47 +7645,43 @@ let Predicates = [HasVLX] in {
 }
 
 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
-                           X86MemOperand x86memop, X86FoldableSchedWrite sched> {
+                           X86MemOperand x86memop> {
   defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
                    (ins _src.RC:$src1, i32u8imm:$src2),
                    "vcvtps2ph", "$src2, $src1", "$src1, $src2",
                    (X86cvtps2ph (_src.VT _src.RC:$src1),
                                 (i32 imm:$src2)), 0, 0>,
-                   AVX512AIi8Base, Sched<[sched]>;
+                   AVX512AIi8Base, Sched<[WriteCvtF2F]>;
   let hasSideEffects = 0, mayStore = 1 in {
     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-               Sched<[sched.Folded, ReadAfterLd]>;
+               Sched<[WriteCvtF2FSt]>;
     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
-                EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+                EVEX_K, Sched<[WriteCvtF2FSt]>;
   }
 }
 
-multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
-                               X86FoldableSchedWrite sched> {
+multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
   let hasSideEffects = 0 in
   defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
                    (outs _dest.RC:$dst),
                    (ins _src.RC:$src1, i32u8imm:$src2),
                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
-                   EVEX_B, AVX512AIi8Base, Sched<[sched]>;
+                   EVEX_B, AVX512AIi8Base, Sched<[WriteCvtF2F]>;
 }
 
 let Predicates = [HasAVX512] in {
-  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, WriteCvtF2F>,
-                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info,
-                                        WriteCvtF2F>, EVEX, EVEX_V512,
-                                        EVEX_CD8<32, CD8VH>;
+  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
+                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
+                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
   let Predicates = [HasVLX] in {
-    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
-                                         WriteCvtF2F>, EVEX, EVEX_V256,
-                                         EVEX_CD8<32, CD8VH>;
-    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
-                                         WriteCvtF2F>, EVEX, EVEX_V128,
-                                         EVEX_CD8<32, CD8VH>;
+    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
+                                         EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
+                                         EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
   }
 
   def : Pat<(store (f64 (extractelt

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Apr 24 09:43:07 2018
@@ -7283,12 +7283,11 @@ multiclass f16c_ps2ph<RegisterClass RC,
                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
                TAPD, VEX, Sched<[WriteCvtF2F]>;
-  let hasSideEffects = 0, mayStore = 1,
-      SchedRW = [WriteCvtF2FLd, WriteRMW] in
+  let hasSideEffects = 0, mayStore = 1 in
   def mr : Ii8<0x1D, MRMDestMem, (outs),
                (ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-               TAPD, VEX;
+               TAPD, VEX, Sched<[WriteCvtF2FSt]>;
 }
 
 let Predicates = [HasF16C, NoVLX] in {

Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Tue Apr 24 09:43:07 2018
@@ -170,6 +170,12 @@ defm : BWWriteResPair<WriteFVarShuffle,
 defm : BWWriteResPair<WriteFBlend,  [BWPort015],  1>; // Floating point vector blends.
 defm : BWWriteResPair<WriteFVarBlend,  [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
 
+def  : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
+  let Latency = 4;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+
 // FMA Scheduling helper class.
 // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
 
@@ -806,8 +812,7 @@ def: InstRW<[BWWriteResGroup44], (instre
                                             "IST_F32m",
                                             "IST_FP16m",
                                             "IST_FP32m",
-                                            "IST_FP64m",
-                                            "VCVTPS2PH(Y?)mr")>;
+                                            "IST_FP64m")>;
 
 def BWWriteResGroup45 : SchedWriteRes<[BWPort0156]> {
   let Latency = 4;

Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Tue Apr 24 09:43:07 2018
@@ -169,6 +169,12 @@ defm : HWWriteResPair<WriteFShuffle256,
 defm : HWWriteResPair<WriteFVarShuffle256,  [HWPort5],  3>;
 defm : HWWriteResPair<WriteFVarBlend,  [HWPort5], 2, [2], 2, 6>;
 
+def  : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> {
+  let Latency = 5;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,1,1];
+}
+
 // Vector integer operations.
 def  : WriteRes<WriteVecStore,       [HWPort237, HWPort4]>;
 def  : WriteRes<WriteVecLoad,        [HWPort23]> { let Latency = 5; }
@@ -1823,13 +1829,6 @@ def: InstRW<[HWWriteResGroup84], (instre
                                             "VPMASKMOVD(Y?)mr",
                                             "VPMASKMOVQ(Y?)mr")>;
 
-def HWWriteResGroup85 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort237]> {
-  let Latency = 5;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup85], (instregex "VCVTPS2PHmr")>;
-
 def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> {
   let Latency = 10;
   let NumMicroOps = 4;

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Tue Apr 24 09:43:07 2018
@@ -155,6 +155,7 @@ defm : SBWriteResPair<WriteFShuffle, [SB
 defm : SBWriteResPair<WriteFVarShuffle, [SBPort5],  1>;
 defm : SBWriteResPair<WriteFBlend,  [SBPort05],  1>;
 defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
+def  : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; }
 
 // Vector integer operations.
 def  : WriteRes<WriteVecStore,       [SBPort23, SBPort4]>;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Tue Apr 24 09:43:07 2018
@@ -167,6 +167,12 @@ defm : SKLWriteResPair<WriteFVarShuffle,
 defm : SKLWriteResPair<WriteFBlend,  [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
 defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
 
+def  : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
+  let Latency = 6;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,1,1];
+}
+
 // FMA Scheduling helper class.
 // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
 
@@ -1212,13 +1218,6 @@ def SKLWriteResGroup80 : SchedWriteRes<[
 }
 def: InstRW<[SKLWriteResGroup80], (instregex "SLDT(16|32|64)r")>;
 
-def SKLWriteResGroup81 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
-  let Latency = 6;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup81], (instregex "VCVTPS2PHmr")>;
-
 def SKLWriteResGroup82 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
   let Latency = 6;
   let NumMicroOps = 4;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Tue Apr 24 09:43:07 2018
@@ -167,6 +167,12 @@ defm : SKXWriteResPair<WriteFVarShuffle,
 defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
 defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
 
+def  : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
+  let Latency = 6;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,1,1];
+}
+
 // FMA Scheduling helper class.
 // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
 
@@ -2340,13 +2346,6 @@ def SKXWriteResGroup84 : SchedWriteRes<[
 }
 def: InstRW<[SKXWriteResGroup84], (instregex "SLDT(16|32|64)r")>;
 
-def SKXWriteResGroup85 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
-  let Latency = 6;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup85], (instregex "VCVTPS2PHmr")>;
-
 def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
   let Latency = 6;
   let NumMicroOps = 4;

Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Tue Apr 24 09:43:07 2018
@@ -131,6 +131,7 @@ def WriteMMXMOVMSK : SchedWrite;
 defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
 defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
 defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
+def  WriteCvtF2FSt : SchedWrite; // // Float -> Float + store size conversion.
 
 // CRC32 instruction.
 defm WriteCRC32 : X86SchedWritePair;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Tue Apr 24 09:43:07 2018
@@ -227,6 +227,7 @@ defm : AtomWriteResPair<WriteFVarShuffle
 defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
 defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
 defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
+def  : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
 
 ////////////////////////////////////////////////////////////////////////////////
 // Vector integer operations.

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Tue Apr 24 09:43:07 2018
@@ -321,6 +321,7 @@ defm : JWriteResFpuPair<WriteFVarShuffle
 defm : JWriteResFpuPair<WriteCvtF2I,       [JFPU1, JSTC], 3>; // Float -> Integer.
 defm : JWriteResFpuPair<WriteCvtI2F,       [JFPU1, JSTC], 3>; // Integer -> Float.
 defm : JWriteResFpuPair<WriteCvtF2F,       [JFPU1, JSTC], 3>; // Float -> Float size conversion.
+def  : WriteRes<WriteCvtF2FSt, [JFPU1, JSTC, JSAGU]> { let Latency = 4; }
 
 def JWriteCVTF2F : SchedWriteRes<[JFPU1, JSTC]> {
   let Latency = 7;
@@ -491,11 +492,6 @@ def : InstRW<[JWriteINSERTQ], (instrs IN
 // F16C instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-def JWriteCVT3St: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
-  let Latency = 4;
-}
-def : InstRW<[JWriteCVT3St], (instrs VCVTPS2PHmr)>;
-
 def JWriteCVTPS2PHY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
   let Latency = 6;
   let ResourceCycles = [2, 2, 2];

Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Tue Apr 24 09:43:07 2018
@@ -145,6 +145,7 @@ defm : SLMWriteResPair<WriteFLogic, [SLM
 defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteFBlend,  [SLM_FPC_RSV0],  1>;
+def  : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
 
 // Vector integer operations.
 def  : WriteRes<WriteVecStore,       [SLM_FPC_RSV01, SLM_MEC_RSV]>;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Tue Apr 24 09:43:07 2018
@@ -211,6 +211,7 @@ defm : ZnWriteResFpuPair<WriteFMA,
 defm : ZnWriteResFpuPair<WriteFRcp,      [ZnFPU01], 5>;
 defm : ZnWriteResFpuPair<WriteFRsqrt,    [ZnFPU01], 5>;
 defm : ZnWriteResFpuPair<WriteFSqrt,     [ZnFPU3], 20>;
+def  : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
 
 // Vector integer operations which uses FPU units
 def  : WriteRes<WriteVecStore,            [ZnAGU]>;

Modified: llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/f16c-schedule.ll?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/f16c-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/f16c-schedule.ll Tue Apr 24 09:43:07 2018
@@ -125,13 +125,13 @@ define <8 x i16> @test_vcvtps2ph_128(<4
 ; GENERIC-LABEL: test_vcvtps2ph_128:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00]
+; GENERIC-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; IVY-LABEL: test_vcvtps2ph_128:
 ; IVY:       # %bb.0:
 ; IVY-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
-; IVY-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00]
+; IVY-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
 ; IVY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_vcvtps2ph_128:
@@ -175,14 +175,14 @@ define <8 x i16> @test_vcvtps2ph_256(<8
 ; GENERIC-LABEL: test_vcvtps2ph_256:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
+; GENERIC-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; IVY-LABEL: test_vcvtps2ph_256:
 ; IVY:       # %bb.0:
 ; IVY-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
-; IVY-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
+; IVY-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
 ; IVY-NEXT:    vzeroupper # sched: [100:0.33]
 ; IVY-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s?rev=330737&r1=330736&r2=330737&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s Tue Apr 24 09:43:07 2018
@@ -27,9 +27,9 @@ vcvtps2ph   $0, %ymm0, (%rax)
 # CHECK-NEXT:  1      3     1.00                    	vcvtph2ps	%xmm0, %ymm2
 # CHECK-NEXT:  2      8     1.00    *               	vcvtph2ps	(%rax), %ymm2
 # CHECK-NEXT:  1      3     1.00                    	vcvtps2ph	$0, %xmm0, %xmm2
-# CHECK-NEXT:  3      8     1.00           *        	vcvtps2ph	$0, %xmm0, (%rax)
+# CHECK-NEXT:  1      4     1.00           *        	vcvtps2ph	$0, %xmm0, (%rax)
 # CHECK-NEXT:  1      3     1.00                    	vcvtps2ph	$0, %ymm0, %xmm2
-# CHECK-NEXT:  3      8     1.00           *        	vcvtps2ph	$0, %ymm0, (%rax)
+# CHECK-NEXT:  1      4     1.00           *        	vcvtps2ph	$0, %ymm0, (%rax)
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0] - SBDivider
@@ -43,7 +43,7 @@ vcvtps2ph   $0, %ymm0, (%rax)
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -      -      -     8.00   2.00    -     3.00   3.00
+# CHECK-NEXT:  -      -      -     8.00   2.00    -     2.00   2.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  	Instructions:
@@ -52,7 +52,7 @@ vcvtps2ph   $0, %ymm0, (%rax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     	vcvtph2ps	%xmm0, %ymm2
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   	vcvtph2ps	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     	vcvtps2ph	$0, %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -     1.00   1.00   	vcvtps2ph	$0, %xmm0, (%rax)
+# CHECK-NEXT:  -      -      -     1.00   1.00    -     0.50   0.50   	vcvtps2ph	$0, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     	vcvtps2ph	$0, %ymm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -     1.00   1.00   	vcvtps2ph	$0, %ymm0, (%rax)
+# CHECK-NEXT:  -      -      -     1.00   1.00    -     0.50   0.50   	vcvtps2ph	$0, %ymm0, (%rax)
 




More information about the llvm-commits mailing list