[llvm] r334407 - [X86] Fix skylake server scheduling info.
Clement Courbet via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 11 07:37:54 PDT 2018
Author: courbet
Date: Mon Jun 11 07:37:53 2018
New Revision: 334407
URL: http://llvm.org/viewvc/llvm-project?rev=334407&view=rev
Log:
[X86] Fix skylake server scheduling info.
Summary:
This fixes most of the scheduling info for SKX vector operations.
I had to split a lot of the YMM/ZMM classes into separate classes for YMM and ZMM.
The before/after llvm-exegesis analysis are in the phabricator diff.
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D47721
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/avx-schedule.ll
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll
llvm/trunk/test/CodeGen/X86/fma-schedule.ll
llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
llvm/trunk/test/CodeGen/X86/sha-schedule.ll
llvm/trunk/test/CodeGen/X86/sse-schedule.ll
llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Jun 11 07:37:53 2018
@@ -8014,8 +8014,8 @@ multiclass avx512_cvtph2ps_sae<X86Vector
let Predicates = [HasAVX512] in
defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
- WriteCvtPH2PSY>,
- avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSY>,
+ WriteCvtPH2PSZ>,
+ avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
@@ -8068,8 +8068,8 @@ multiclass avx512_cvtps2ph_sae<X86Vector
let Predicates = [HasAVX512] in {
defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
- WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
- avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PH>,
+ WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
+ avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Mon Jun 11 07:37:53 2018
@@ -189,25 +189,31 @@ defm : X86WriteRes<WriteFMoveY, [
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
defm : BWWriteResPair<WriteFAddX, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub (XMM).
defm : BWWriteResPair<WriteFAddY, [BWPort1], 3, [1], 1, 6>; // Floating point add/sub (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : BWWriteResPair<WriteFAdd64, [BWPort1], 3, [1], 1, 5>; // Floating point double add/sub.
defm : BWWriteResPair<WriteFAdd64X, [BWPort1], 3, [1], 1, 5>; // Floating point double add/sub (XMM).
defm : BWWriteResPair<WriteFAdd64Y, [BWPort1], 3, [1], 1, 6>; // Floating point double add/sub (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : BWWriteResPair<WriteFCmp, [BWPort1], 3, [1], 1, 5>; // Floating point compare.
defm : BWWriteResPair<WriteFCmpX, [BWPort1], 3, [1], 1, 5>; // Floating point compare (XMM).
defm : BWWriteResPair<WriteFCmpY, [BWPort1], 3, [1], 1, 6>; // Floating point compare (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : BWWriteResPair<WriteFCmp64, [BWPort1], 3, [1], 1, 5>; // Floating point double compare.
defm : BWWriteResPair<WriteFCmp64X, [BWPort1], 3, [1], 1, 5>; // Floating point double compare (XMM).
defm : BWWriteResPair<WriteFCmp64Y, [BWPort1], 3, [1], 1, 6>; // Floating point double compare (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
defm : BWWriteResPair<WriteFMul, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication.
defm : BWWriteResPair<WriteFMulX, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication (XMM).
defm : BWWriteResPair<WriteFMulY, [BWPort01], 3, [1], 1, 6>; // Floating point multiplication (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : BWWriteResPair<WriteFMul64, [BWPort01], 3, [1], 1, 5>; // Floating point double multiplication.
defm : BWWriteResPair<WriteFMul64X, [BWPort01], 3, [1], 1, 5>; // Floating point double multiplication (XMM).
defm : BWWriteResPair<WriteFMul64Y, [BWPort01], 3, [1], 1, 6>; // Floating point double multiplication (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
//defm : BWWriteResPair<WriteFDiv, [BWPort0,BWFPDivider], 11, [1,3], 1, 5>; // Floating point division.
defm : BWWriteResPair<WriteFDivX, [BWPort0,BWFPDivider], 11, [1,5], 1, 5>; // Floating point division (XMM).
@@ -233,34 +239,45 @@ defm : BWWriteResPair<WriteFSqrt80, [BW
defm : BWWriteResPair<WriteFRcp, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate.
defm : BWWriteResPair<WriteFRcpX, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate (XMM).
defm : BWWriteResPair<WriteFRcpY, [BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal estimate (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate.
defm : BWWriteResPair<WriteFRsqrtX,[BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate (XMM).
defm : BWWriteResPair<WriteFRsqrtY,[BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal square root estimate (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : BWWriteResPair<WriteFMA, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add.
defm : BWWriteResPair<WriteFMAX, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (XMM).
defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : BWWriteResPair<WriteDPPD, [BWPort0,BWPort1,BWPort5], 9, [1,1,1], 3, 5>; // Floating point double dot product.
defm : BWWriteResPair<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 5>; // Floating point single dot product.
defm : BWWriteResPair<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 6>; // Floating point single dot product (YMM).
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
defm : X86WriteRes<WriteFRnd, [BWPort23], 6, [1], 1>; // Floating point rounding.
defm : X86WriteRes<WriteFRndY, [BWPort23], 6, [1], 1>; // Floating point rounding (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : X86WriteRes<WriteFRndLd, [BWPort1,BWPort23], 11, [2,1], 3>;
defm : X86WriteRes<WriteFRndYLd, [BWPort1,BWPort23], 12, [2,1], 3>;
defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals.
defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : BWWriteResPair<WriteFTest, [BWPort0], 1, [1], 1, 5>; // Floating point TEST instructions.
defm : BWWriteResPair<WriteFTestY, [BWPort0], 1, [1], 1, 6>; // Floating point TEST instructions (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
defm : BWWriteResPair<WriteFShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector shuffles (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFVarShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1, [1], 1, 5>; // Floating point vector blends.
defm : BWWriteResPair<WriteFBlendY, [BWPort015], 1, [1], 1, 6>; // Floating point vector blends.
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
defm : BWWriteResPair<WriteFVarBlendY, [BWPort5], 2, [2], 2, 6>; // Fp vector variable blends.
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
@@ -291,31 +308,42 @@ defm : X86WriteRes<WriteEMMS,
defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : BWWriteResPair<WriteVecALUX, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicX,[BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : BWWriteResPair<WriteVecTest, [BWPort0,BWPort5], 2, [1,1], 2, 5>; // Vector integer TEST instructions.
defm : BWWriteResPair<WriteVecTestY, [BWPort0,BWPort5], 4, [1,1], 2, 6>; // Vector integer TEST instructions (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : BWWriteResPair<WriteVecIMulX, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : BWWriteResPair<WriteVecIMulY, [BWPort0], 5, [1], 1, 6>; // Vector integer multiply.
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // Vector PMULLD.
defm : BWWriteResPair<WritePMULLDY, [BWPort0], 10, [2], 2, 6>; // Vector PMULLD (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : BWWriteResPair<WriteShuffleX, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : BWWriteResPair<WriteShuffleY, [BWPort5], 1, [1], 1, 6>; // Vector shuffles (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
defm : BWWriteResPair<WriteVarShuffleX,[BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
defm : BWWriteResPair<WriteVarShuffleY,[BWPort5], 1, [1], 1, 6>; // Vector variable shuffles (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : BWWriteResPair<WriteBlend, [BWPort5], 1, [1], 1, 5>; // Vector blends.
defm : BWWriteResPair<WriteBlendY, [BWPort5], 1, [1], 1, 6>; // Vector blends (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends.
defm : BWWriteResPair<WriteVarBlendY, [BWPort5], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
defm : BWWriteResPair<WriteMPSADY, [BWPort0, BWPort5], 7, [1, 2], 3, 6>; // Vector MPSAD.
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : BWWriteResPair<WritePSADBW, [BWPort0], 5, [1], 1, 5>; // Vector PSADBW.
defm : BWWriteResPair<WritePSADBWX, [BWPort0], 5, [1], 1, 5>; // Vector PSADBW.
defm : BWWriteResPair<WritePSADBWY, [BWPort0], 5, [1], 1, 6>; // Vector PSADBW (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS.
// Vector integer shifts.
@@ -323,12 +351,15 @@ defm : BWWriteResPair<WriteVecShift,
defm : BWWriteResPair<WriteVecShiftX, [BWPort0,BWPort5], 2, [1,1], 2, 5>;
defm : X86WriteRes<WriteVecShiftY, [BWPort0,BWPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftYLd, [BWPort0,BWPort23], 7, [1,1], 2>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : BWWriteResPair<WriteVecShiftImm, [BWPort0], 1, [1], 1, 5>;
defm : BWWriteResPair<WriteVecShiftImmX, [BWPort0], 1, [1], 1, 5>; // Vector integer immediate shifts (XMM).
defm : BWWriteResPair<WriteVecShiftImmY, [BWPort0], 1, [1], 1, 6>; // Vector integer immediate shifts (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 3, [2,1], 3, 5>; // Variable vector shifts.
defm : BWWriteResPair<WriteVarVecShiftY, [BWPort0, BWPort5], 3, [2,1], 3, 6>; // Variable vector shifts (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [BWPort5]> {
@@ -354,33 +385,43 @@ def : WriteRes<WriteVecExtractSt, [BWPor
defm : BWWriteResPair<WriteCvtSS2I, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPS2I, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPS2IY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : BWWriteResPair<WriteCvtSD2I, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPD2I, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : BWWriteResPair<WriteCvtI2SS, [BWPort1], 4>;
defm : BWWriteResPair<WriteCvtI2PS, [BWPort1], 4>;
defm : BWWriteResPair<WriteCvtI2PSY, [BWPort1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : BWWriteResPair<WriteCvtI2SD, [BWPort1], 4>;
defm : BWWriteResPair<WriteCvtI2PD, [BWPort1], 4>;
defm : BWWriteResPair<WriteCvtI2PDY, [BWPort1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : BWWriteResPair<WriteCvtSS2SD, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPS2PD, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [BWPort1,BWPort5], 6, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteRes<WriteCvtPS2PHSt, [BWPort1,BWPort4,BWPort237], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [BWPort1,BWPort4,BWPort237], 7, [1,1,1], 3>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
// Strings instructions.
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Mon Jun 11 07:37:53 2018
@@ -185,25 +185,31 @@ defm : X86WriteRes<WriteEMMS, [
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>;
defm : HWWriteResPair<WriteFAddX, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFAddY, [HWPort1], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteFAddZ, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFAdd64, [HWPort1], 3, [1], 1, 5>;
defm : HWWriteResPair<WriteFAdd64X, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFAdd64Y, [HWPort1], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteFAdd64Z, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFCmp, [HWPort1], 3, [1], 1, 5>;
defm : HWWriteResPair<WriteFCmpX, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFCmpY, [HWPort1], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteFCmpZ, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFCmp64, [HWPort1], 3, [1], 1, 5>;
defm : HWWriteResPair<WriteFCmp64X, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFCmp64Y, [HWPort1], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteFCmp64Z, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMul, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMulX, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMulY, [HWPort01], 5, [1], 1, 7>;
+defm : HWWriteResPair<WriteFMulZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFMul64, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMul64X, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMul64Y, [HWPort01], 5, [1], 1, 7>;
+defm : HWWriteResPair<WriteFMul64Z, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFDiv, [HWPort0,HWFPDivider], 13, [1,7], 1, 5>;
defm : HWWriteResPair<WriteFDivX, [HWPort0,HWFPDivider], 13, [1,7], 1, 6>;
@@ -217,10 +223,12 @@ defm : HWWriteResPair<WriteFDiv64Z, [HWP
defm : HWWriteResPair<WriteFRcp, [HWPort0], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFRcpX, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFRcpY, [HWPort0,HWPort015], 11, [2,1], 3, 7>;
+defm : HWWriteResPair<WriteFRcpZ, [HWPort0,HWPort015], 11, [2,1], 3, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFRsqrt, [HWPort0], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFRsqrtX,[HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFRsqrtY,[HWPort0,HWPort015], 11, [2,1], 3, 7>;
+defm : HWWriteResPair<WriteFRsqrtZ,[HWPort0,HWPort015], 11, [2,1], 3, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFSqrt, [HWPort0,HWFPDivider], 11, [1,7], 1, 5>;
defm : HWWriteResPair<WriteFSqrtX, [HWPort0,HWFPDivider], 11, [1,7], 1, 6>;
@@ -235,60 +243,80 @@ defm : HWWriteResPair<WriteFSqrt80, [HW
defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
+defm : HWWriteResPair<WriteFMAZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>;
defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
+defm : HWWriteResPair<WriteDPPSZ, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
defm : X86WriteRes<WriteFRnd, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFRndY, [HWPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteFRndZ, [HWPort23], 6, [1], 1>; // Unsupported = 1
defm : X86WriteRes<WriteFRndLd, [HWPort1,HWPort23], 12, [2,1], 3>;
defm : X86WriteRes<WriteFRndYLd, [HWPort1,HWPort23], 13, [2,1], 3>;
+defm : X86WriteRes<WriteFRndZLd, [HWPort1,HWPort23], 13, [2,1], 3>; // Unsupported = 1
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteFLogicZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFTest, [HWPort0], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFTestY, [HWPort0], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteFTestZ, [HWPort0], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFShuffleY, [HWPort5], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteFShuffleZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFVarShuffleY, [HWPort5], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteFVarShuffleZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFBlendY, [HWPort015], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteFBlendZ, [HWPort015], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
+defm : HWWriteResPair<WriteFVarBlendZ, [HWPort5], 2, [2], 2, 7>; // Unsupported = 1
// Conversion between integer and float.
defm : HWWriteResPair<WriteCvtSD2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2IY, [HWPort1], 3>;
+defm : HWWriteResPair<WriteCvtPD2IZ, [HWPort1], 3>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtSS2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2IY, [HWPort1], 3>;
+defm : HWWriteResPair<WriteCvtPS2IZ, [HWPort1], 3>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtI2SD, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtI2PD, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtI2PDY, [HWPort1], 4>;
+defm : HWWriteResPair<WriteCvtI2PDZ, [HWPort1], 4>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtI2SS, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtI2PS, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtI2PSY, [HWPort1], 4>;
+defm : HWWriteResPair<WriteCvtI2PSZ, [HWPort1], 4>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtSS2SD, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2PD, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort1], 3>;
+defm : HWWriteResPair<WriteCvtPS2PDZ, [HWPort1], 3>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtSD2SS, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2PS, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1], 3>;
+defm : HWWriteResPair<WriteCvtPD2PSZ, [HWPort1], 3>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPH2PS, [HWPort0,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [HWPort0,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZ, [HWPort0,HWPort5], 2, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort0,HWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort0,HWPort23], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZLd, [HWPort0,HWPort23], 7, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PH, [HWPort1,HWPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [HWPort1,HWPort5], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHZ, [HWPort1,HWPort5], 6, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PHSt, [HWPort1,HWPort4,HWPort5,HWPort237], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [HWPort1,HWPort4,HWPort5,HWPort237], 7, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHZSt, [HWPort1,HWPort4,HWPort5,HWPort237], 7, [1,1,1,1], 4>; // Unsupported = 1
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
@@ -314,46 +342,61 @@ defm : X86WriteRes<WriteVecMoveFromGpr,
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVecLogicX,[HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteVecLogicZ,[HWPort015], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteVecTest, [HWPort0,HWPort5], 2, [1,1], 2, 6>;
defm : HWWriteResPair<WriteVecTestY, [HWPort0,HWPort5], 4, [1,1], 2, 7>;
+defm : HWWriteResPair<WriteVecTestZ, [HWPort0,HWPort5], 4, [1,1], 2, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteVecALU, [HWPort15], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVecALUX, [HWPort15], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecALUY, [HWPort15], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteVecALUZ, [HWPort15], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteVecIMulX, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteVecIMulY, [HWPort0], 5, [1], 1, 7>;
+defm : HWWriteResPair<WriteVecIMulZ, [HWPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
defm : HWWriteResPair<WritePMULLDY, [HWPort0], 10, [2], 2, 7>;
+defm : HWWriteResPair<WritePMULLDZ, [HWPort0], 10, [2], 2, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteShuffleX, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteShuffleY, [HWPort5], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteShuffleZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVarShuffleX,[HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVarShuffleY,[HWPort5], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteVarShuffleZ,[HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteBlend, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteBlendY, [HWPort5], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteBlendZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteVarBlendY, [HWPort5], 2, [2], 2, 7>;
+defm : HWWriteResPair<WriteVarBlendZ, [HWPort5], 2, [2], 2, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
defm : HWWriteResPair<WriteMPSADY, [HWPort0, HWPort5], 7, [1, 2], 3, 7>;
+defm : HWWriteResPair<WriteMPSADZ, [HWPort0, HWPort5], 7, [1, 2], 3, 7>; // Unsupported = 1
defm : HWWriteResPair<WritePSADBW, [HWPort0], 5, [1], 1, 5>;
defm : HWWriteResPair<WritePSADBWX, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WritePSADBWY, [HWPort0], 5, [1], 1, 7>;
+defm : HWWriteResPair<WritePSADBWZ, [HWPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WritePHMINPOS, [HWPort0], 5, [1], 1, 6>;
// Vector integer shifts.
defm : HWWriteResPair<WriteVecShift, [HWPort0], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVecShiftX, [HWPort0,HWPort5], 2, [1,1], 2, 6>;
defm : X86WriteRes<WriteVecShiftY, [HWPort0,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftZ, [HWPort0,HWPort5], 4, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteVecShiftYLd, [HWPort0,HWPort23], 8, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftZLd, [HWPort0,HWPort23], 8, [1,1], 2>; // Unsupported = 1
defm : HWWriteResPair<WriteVecShiftImm, [HWPort0], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVecShiftImmX, [HWPort0], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecShiftImmY, [HWPort0], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteVecShiftImmZ, [HWPort0], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 3, [2,1], 3, 6>;
defm : HWWriteResPair<WriteVarVecShiftY, [HWPort0, HWPort5], 3, [2,1], 3, 7>;
+defm : HWWriteResPair<WriteVarVecShiftZ, [HWPort0, HWPort5], 3, [2,1], 3, 7>; // Unsupported = 1
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [HWPort5]> {
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Mon Jun 11 07:37:53 2018
@@ -176,25 +176,31 @@ defm : X86WriteRes<WriteEMMS, [
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAddX, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAddY, [SBPort1], 3, [1], 1, 7>;
+defm : SBWriteResPair<WriteFAddZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFAdd64, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAdd64X, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAdd64Y, [SBPort1], 3, [1], 1, 7>;
+defm : SBWriteResPair<WriteFAdd64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFCmpX, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFCmpY, [SBPort1], 3, [1], 1, 7>;
+defm : SBWriteResPair<WriteFCmpZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFCmp64, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFCmp64X, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFCmp64Y, [SBPort1], 3, [1], 1, 7>;
+defm : SBWriteResPair<WriteFCmp64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFMulX, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFMulY, [SBPort0], 5, [1], 1, 7>;
+defm : SBWriteResPair<WriteFMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFMul64, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFMul64X, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFMul64Y, [SBPort0], 5, [1], 1, 7>;
+defm : SBWriteResPair<WriteFMul64Z, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFDiv, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
defm : SBWriteResPair<WriteFDivX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
@@ -208,10 +214,12 @@ defm : SBWriteResPair<WriteFDiv64Z, [SBP
defm : SBWriteResPair<WriteFRcp, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFRcpX, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFRcpY, [SBPort0,SBPort05], 7, [2,1], 3, 7>;
+defm : SBWriteResPair<WriteFRcpZ, [SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFRsqrt, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFRsqrtX,[SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFRsqrtY,[SBPort0,SBPort05], 7, [2,1], 3, 7>;
+defm : SBWriteResPair<WriteFRsqrtZ,[SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFSqrt, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
defm : SBWriteResPair<WriteFSqrtX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
@@ -226,58 +234,78 @@ defm : SBWriteResPair<WriteFSqrt80, [SB
defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>;
defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>;
defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>;
+defm : SBWriteResPair<WriteDPPSZ, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>;
+defm : SBWriteResPair<WriteFRndZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteFLogicZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFTest, [SBPort0], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFTestY, [SBPort0], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteFTestZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteFShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteFVarShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteFBlendZ, [SBPort05], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
+defm : SBWriteResPair<WriteFVarBlendZ,[SBPort05], 2, [2], 2, 7>; // Unsupported = 1
// Conversion between integer and float.
defm : SBWriteResPair<WriteCvtSS2I, [SBPort0,SBPort1], 5, [1,1], 2>;
defm : SBWriteResPair<WriteCvtPS2I, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteCvtPS2IY, [SBPort1], 3, [1], 1, 7>;
+defm : SBWriteResPair<WriteCvtPS2IZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteCvtSD2I, [SBPort0,SBPort1], 5, [1,1], 2>;
defm : SBWriteResPair<WriteCvtPD2I, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : X86WriteRes<WriteCvtPD2IY, [SBPort1,SBPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPD2IZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPD2IYLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPD2IZLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>; // Unsupported = 1
defm : X86WriteRes<WriteCvtI2SS, [SBPort1,SBPort5], 5, [1,2], 3>;
defm : X86WriteRes<WriteCvtI2SSLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
defm : SBWriteResPair<WriteCvtI2PS, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteCvtI2PSY, [SBPort1], 3, [1], 1, 7>;
+defm : SBWriteResPair<WriteCvtI2PSZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : X86WriteRes<WriteCvtI2SD, [SBPort1,SBPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtI2PD, [SBPort1,SBPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtI2PDY, [SBPort1,SBPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtI2SDLd, [SBPort1,SBPort23], 9, [1,1], 2>;
defm : X86WriteRes<WriteCvtI2PDLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
defm : X86WriteRes<WriteCvtI2PDYLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtI2PDZLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; // Unsupported = 1
defm : SBWriteResPair<WriteCvtSS2SD, [SBPort0], 1, [1], 1, 6>;
defm : X86WriteRes<WriteCvtPS2PD, [SBPort0,SBPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PDY, [SBPort0,SBPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDZ, [SBPort0,SBPort5], 2, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PDLd, [SBPort0,SBPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort23], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort23], 7, [1,1], 2>; // Unsupported = 1
defm : SBWriteResPair<WriteCvtSD2SS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
+defm : SBWriteResPair<WriteCvtPD2PSZ, [SBPort1,SBPort5], 4, [1,1], 2, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
+defm : SBWriteResPair<WriteCvtPH2PSZ, [SBPort1], 3>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort1], 3, [1], 1>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>; // Unsupported = 1
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
@@ -303,42 +331,56 @@ defm : X86WriteRes<WriteVecMoveFromGpr,
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteVecLogicX,[SBPort015], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteVecLogicZ,[SBPort015], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVecTest, [SBPort0,SBPort5], 2, [1,1], 2, 6>;
defm : SBWriteResPair<WriteVecTestY, [SBPort0,SBPort5], 2, [1,1], 2, 7>;
+defm : SBWriteResPair<WriteVecTestZ, [SBPort0,SBPort5], 2, [1,1], 2, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVecALU, [SBPort1], 3, [1], 1, 5>;
defm : SBWriteResPair<WriteVecALUX, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecALUY, [SBPort15], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteVecALUZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 5>;
defm : SBWriteResPair<WriteVecIMulX, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>;
+defm : SBWriteResPair<WriteVecIMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WritePMULLDY, [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model
+defm : SBWriteResPair<WritePMULLDZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteShuffleX, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteShuffleZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteVarShuffleX, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVarShuffleY, [SBPort15], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteVarShuffleZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteBlendY, [SBPort15], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteBlendZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>;
+defm : SBWriteResPair<WriteVarBlendZ,[SBPort15], 2, [2], 2, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>;
+defm : SBWriteResPair<WriteMPSADZ, [SBPort0, SBPort15], 7, [1,2], 3, 7>; // Unsupported = 1
defm : SBWriteResPair<WritePSADBW, [SBPort0], 5, [1], 1, 5>;
defm : SBWriteResPair<WritePSADBWX, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>;
+defm : SBWriteResPair<WritePSADBWZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>;
// Vector integer shifts.
defm : SBWriteResPair<WriteVecShift, [SBPort5], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteVecShiftX, [SBPort0,SBPort15], 2, [1,1], 2, 6>;
defm : SBWriteResPair<WriteVecShiftY, [SBPort0,SBPort15], 4, [1,1], 2, 7>;
+defm : SBWriteResPair<WriteVecShiftZ, [SBPort0,SBPort15], 4, [1,1], 2, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVecShiftImm, [SBPort5], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteVecShiftImmX, [SBPort0], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecShiftImmY, [SBPort0], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteVecShiftImmZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVarVecShiftY, [SBPort0], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteVarVecShiftZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> {
@@ -365,9 +407,11 @@ def : WriteRes<WriteVecExtractSt, [SBPor
defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>;
defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>;
+defm : SBWriteResPair<WriteFHAddZ, [SBPort1,SBPort5], 5, [1,2], 3, 7>; // Unsupported = 1
defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 5>;
defm : SBWriteResPair<WritePHAddX, [SBPort15], 3, [3], 3, 6>;
defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>;
+defm : SBWriteResPair<WritePHAddZ, [SBPort15], 3, [3], 3, 7>; // Unsupported = 1
////////////////////////////////////////////////////////////////////////////////
// String instructions.
@@ -484,6 +528,7 @@ defm : SBWriteResPair<WriteVarShuffle256
defm : SBWriteResPair<WriteFMA, [SBPort01], 5>;
defm : SBWriteResPair<WriteFMAX, [SBPort01], 5>;
defm : SBWriteResPair<WriteFMAY, [SBPort01], 5>;
+defm : SBWriteResPair<WriteFMAZ, [SBPort01], 5>; // Unsupported = 1
// Remaining SNB instrs.
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Mon Jun 11 07:37:53 2018
@@ -183,76 +183,93 @@ defm : X86WriteRes<WriteFMoveY, [
defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 5>; // Floating point add/sub.
-defm : SKLWriteResPair<WriteFAddX, [SKLPort01], 4, [1], 1, 6>; // Floating point add/sub (XMM).
-defm : SKLWriteResPair<WriteFAddY, [SKLPort01], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
+defm : SKLWriteResPair<WriteFAddX, [SKLPort01], 4, [1], 1, 6>;
+defm : SKLWriteResPair<WriteFAddY, [SKLPort01], 4, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : SKLWriteResPair<WriteFAdd64, [SKLPort01], 4, [1], 1, 5>; // Floating point double add/sub.
-defm : SKLWriteResPair<WriteFAdd64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double add/sub (XMM).
-defm : SKLWriteResPair<WriteFAdd64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double add/sub (YMM/ZMM).
+defm : SKLWriteResPair<WriteFAdd64X, [SKLPort01], 4, [1], 1, 6>;
+defm : SKLWriteResPair<WriteFAdd64Y, [SKLPort01], 4, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : SKLWriteResPair<WriteFCmp, [SKLPort01], 4, [1], 1, 5>; // Floating point compare.
-defm : SKLWriteResPair<WriteFCmpX, [SKLPort01], 4, [1], 1, 6>; // Floating point compare (XMM).
-defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM).
+defm : SKLWriteResPair<WriteFCmpX, [SKLPort01], 4, [1], 1, 6>;
+defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : SKLWriteResPair<WriteFCmp64, [SKLPort01], 4, [1], 1, 5>; // Floating point double compare.
-defm : SKLWriteResPair<WriteFCmp64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double compare (XMM).
-defm : SKLWriteResPair<WriteFCmp64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double compare (YMM/ZMM).
+defm : SKLWriteResPair<WriteFCmp64X, [SKLPort01], 4, [1], 1, 6>;
+defm : SKLWriteResPair<WriteFCmp64Y, [SKLPort01], 4, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
defm : SKLWriteResPair<WriteFMul, [SKLPort01], 4, [1], 1, 5>; // Floating point multiplication.
-defm : SKLWriteResPair<WriteFMulX, [SKLPort01], 4, [1], 1, 6>; // Floating point multiplication (XMM).
-defm : SKLWriteResPair<WriteFMulY, [SKLPort01], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
+defm : SKLWriteResPair<WriteFMulX, [SKLPort01], 4, [1], 1, 6>;
+defm : SKLWriteResPair<WriteFMulY, [SKLPort01], 4, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : SKLWriteResPair<WriteFMul64, [SKLPort01], 4, [1], 1, 5>; // Floating point double multiplication.
-defm : SKLWriteResPair<WriteFMul64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double multiplication (XMM).
-defm : SKLWriteResPair<WriteFMul64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double multiplication (YMM/ZMM).
+defm : SKLWriteResPair<WriteFMul64X, [SKLPort01], 4, [1], 1, 6>;
+defm : SKLWriteResPair<WriteFMul64Y, [SKLPort01], 4, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : SKLWriteResPair<WriteFDiv, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 5>; // Floating point division.
-//defm : SKLWriteResPair<WriteFDivX, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>; // Floating point division (XMM).
-defm : SKLWriteResPair<WriteFDivY, [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>; // Floating point division (YMM).
+//defm : SKLWriteResPair<WriteFDivX, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>;
+defm : SKLWriteResPair<WriteFDivY, [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
//defm : SKLWriteResPair<WriteFDiv64, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 5>; // Floating point double division.
-//defm : SKLWriteResPair<WriteFDiv64X, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 6>; // Floating point double division (XMM).
-//defm : SKLWriteResPair<WriteFDiv64Y, [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>; // Floating point double division (YMM).
+//defm : SKLWriteResPair<WriteFDiv64X, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 6>;
+//defm : SKLWriteResPair<WriteFDiv64Y, [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : SKLWriteResPair<WriteFSqrt, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
-defm : SKLWriteResPair<WriteFSqrtX, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
-defm : SKLWriteResPair<WriteFSqrtY, [SKLPort0,SKLFPDivider], 12, [1,6], 1, 7>; // Floating point square root (YMM).
+defm : SKLWriteResPair<WriteFSqrtX, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 6>;
+defm : SKLWriteResPair<WriteFSqrtY, [SKLPort0,SKLFPDivider], 12, [1,6], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
defm : SKLWriteResPair<WriteFSqrt64, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
-defm : SKLWriteResPair<WriteFSqrt64X, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 6>; // Floating point double square root (XMM).
-defm : SKLWriteResPair<WriteFSqrt64Y, [SKLPort0,SKLFPDivider], 18, [1,12],1, 7>; // Floating point double square root (YMM).
+defm : SKLWriteResPair<WriteFSqrt64X, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 6>;
+defm : SKLWriteResPair<WriteFSqrt64Y, [SKLPort0,SKLFPDivider], 18, [1,12],1, 7>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : SKLWriteResPair<WriteFSqrt80, [SKLPort0,SKLFPDivider], 21, [1,7]>; // Floating point long double square root.
defm : SKLWriteResPair<WriteFRcp, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
-defm : SKLWriteResPair<WriteFRcpX, [SKLPort0], 4, [1], 1, 6>; // Floating point reciprocal estimate (XMM).
-defm : SKLWriteResPair<WriteFRcpY, [SKLPort0], 4, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
+defm : SKLWriteResPair<WriteFRcpX, [SKLPort0], 4, [1], 1, 6>;
+defm : SKLWriteResPair<WriteFRcpY, [SKLPort0], 4, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
-defm : SKLWriteResPair<WriteFRsqrtX,[SKLPort0], 4, [1], 1, 6>; // Floating point reciprocal square root estimate (XMM).
-defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
+defm : SKLWriteResPair<WriteFRsqrtX,[SKLPort0], 4, [1], 1, 6>;
+defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4, [1], 1, 5>; // Fused Multiply Add.
-defm : SKLWriteResPair<WriteFMAX, [SKLPort01], 4, [1], 1, 6>; // Fused Multiply Add (XMM).
-defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
+defm : SKLWriteResPair<WriteFMAX, [SKLPort01], 4, [1], 1, 6>;
+defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : SKLWriteResPair<WriteDPPD, [SKLPort5,SKLPort01], 9, [1,2], 3, 6>; // Floating point double dot product.
-defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>; // Floating point single dot product.
-defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
+defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>;
+defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
defm : SKLWriteResPair<WriteFRnd, [SKLPort01], 8, [2], 2, 6>; // Floating point rounding.
-defm : SKLWriteResPair<WriteFRndY, [SKLPort01], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM).
+defm : SKLWriteResPair<WriteFRndY, [SKLPort01], 8, [2], 2, 7>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
-defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
+defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : SKLWriteResPair<WriteFTest, [SKLPort0], 2, [1], 1, 6>; // Floating point TEST instructions.
-defm : SKLWriteResPair<WriteFTestY, [SKLPort0], 2, [1], 1, 7>; // Floating point TEST instructions (YMM/ZMM).
+defm : SKLWriteResPair<WriteFTestY, [SKLPort0], 2, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
-defm : SKLWriteResPair<WriteFShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
+defm : SKLWriteResPair<WriteFShuffleY, [SKLPort5], 1, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
-defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
-defm : SKLWriteResPair<WriteFBlendY, [SKLPort015], 1, [1], 1, 7>; // Floating point vector blends.
+defm : SKLWriteResPair<WriteFBlendY, [SKLPort015], 1, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
-defm : SKLWriteResPair<WriteFVarBlendY,[SKLPort015], 2, [2], 2, 7>; // Fp vector variable blends.
+defm : SKLWriteResPair<WriteFVarBlendY,[SKLPort015], 2, [2], 2, 7>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
@@ -279,33 +296,44 @@ defm : X86WriteRes<WriteVecMoveToGpr,
defm : X86WriteRes<WriteVecMoveFromGpr, [SKLPort5], 1, [1], 1>;
defm : SKLWriteResPair<WriteVecALU, [SKLPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
-defm : SKLWriteResPair<WriteVecALUX, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM).
-defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
+defm : SKLWriteResPair<WriteVecALUX, [SKLPort01], 1, [1], 1, 6>;
+defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : SKLWriteResPair<WriteVecLogic, [SKLPort05], 1, [1], 1, 5>; // Vector integer and/or/xor.
-defm : SKLWriteResPair<WriteVecLogicX,[SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (XMM).
-defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
+defm : SKLWriteResPair<WriteVecLogicX,[SKLPort015], 1, [1], 1, 6>;
+defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : SKLWriteResPair<WriteVecTest, [SKLPort0,SKLPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
-defm : SKLWriteResPair<WriteVecTestY, [SKLPort0,SKLPort5], 3, [1,1], 2, 7>; // Vector integer TEST instructions (YMM/ZMM).
+defm : SKLWriteResPair<WriteVecTestY, [SKLPort0,SKLPort5], 3, [1,1], 2, 7>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0] , 4, [1], 1, 5>; // Vector integer multiply.
-defm : SKLWriteResPair<WriteVecIMulX, [SKLPort01], 4, [1], 1, 6>; // Vector integer multiply (XMM).
-defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
+defm : SKLWriteResPair<WriteVecIMulX, [SKLPort01], 4, [1], 1, 6>;
+defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD.
-defm : SKLWriteResPair<WritePMULLDY, [SKLPort01], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
+defm : SKLWriteResPair<WritePMULLDY, [SKLPort01], 10, [2], 2, 7>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
-defm : SKLWriteResPair<WriteShuffleX, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
-defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
+defm : SKLWriteResPair<WriteShuffleX, [SKLPort5], 1, [1], 1, 6>;
+defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
-defm : SKLWriteResPair<WriteVarShuffleX, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
-defm : SKLWriteResPair<WriteVarShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
+defm : SKLWriteResPair<WriteVarShuffleX, [SKLPort5], 1, [1], 1, 6>;
+defm : SKLWriteResPair<WriteVarShuffleY, [SKLPort5], 1, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : SKLWriteResPair<WriteBlend, [SKLPort5], 1, [1], 1, 6>; // Vector blends.
-defm : SKLWriteResPair<WriteBlendY, [SKLPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM).
+defm : SKLWriteResPair<WriteBlendY, [SKLPort5], 1, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.
-defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
+defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
-defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>; // Vector MPSAD (YMM/ZMM).
+defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3, [1], 1, 5>; // Vector PSADBW.
-defm : SKLWriteResPair<WritePSADBWX, [SKLPort5], 3, [1], 1, 6>; // Vector PSADBW (XMM).
-defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW (YMM/ZMM).
+defm : SKLWriteResPair<WritePSADBWX, [SKLPort5], 3, [1], 1, 6>;
+defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS.
// Vector integer shifts.
@@ -314,12 +342,15 @@ defm : X86WriteRes<WriteVecShiftX,
defm : X86WriteRes<WriteVecShiftY, [SKLPort5,SKLPort01], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftXLd, [SKLPort01,SKLPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftYLd, [SKLPort01,SKLPort23], 8, [1,1], 2>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
-defm : SKLWriteResPair<WriteVecShiftImm, [SKLPort0], 1, [1], 1, 5>;
-defm : SKLWriteResPair<WriteVecShiftImmX, [SKLPort01], 1, [1], 1, 6>; // Vector integer immediate shifts (XMM).
-defm : SKLWriteResPair<WriteVecShiftImmY, [SKLPort01], 1, [1], 1, 7>; // Vector integer immediate shifts (YMM/ZMM).
+defm : SKLWriteResPair<WriteVecShiftImm, [SKLPort0], 1, [1], 1, 5>; // Vector integer immediate shifts.
+defm : SKLWriteResPair<WriteVecShiftImmX, [SKLPort01], 1, [1], 1, 6>;
+defm : SKLWriteResPair<WriteVecShiftImmY, [SKLPort01], 1, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : SKLWriteResPair<WriteVarVecShift, [SKLPort01], 1, [1], 1, 6>; // Variable vector shifts.
-defm : SKLWriteResPair<WriteVarVecShiftY, [SKLPort01], 1, [1], 1, 7>; // Variable vector shifts (YMM/ZMM).
+defm : SKLWriteResPair<WriteVarVecShiftY, [SKLPort01], 1, [1], 1, 7>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SKLPort5]> {
@@ -346,33 +377,43 @@ def : WriteRes<WriteVecExtractSt, [SKLPo
defm : SKLWriteResPair<WriteCvtSS2I, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPS2I, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPS2IY, [SKLPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : SKLWriteResPair<WriteCvtSD2I, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPD2I, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPD2IY, [SKLPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : SKLWriteResPair<WriteCvtI2SS, [SKLPort1], 4>;
defm : SKLWriteResPair<WriteCvtI2PS, [SKLPort1], 4>;
defm : SKLWriteResPair<WriteCvtI2PSY, [SKLPort1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : SKLWriteResPair<WriteCvtI2SD, [SKLPort1], 4>;
defm : SKLWriteResPair<WriteCvtI2PD, [SKLPort1], 4>;
defm : SKLWriteResPair<WriteCvtI2PDY, [SKLPort1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : SKLWriteResPair<WriteCvtSS2SD, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPS2PD, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPS2PDY, [SKLPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : SKLWriteResPair<WriteCvtSD2SS, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPD2PS, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPD2PSY, [SKLPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort015], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteRes<WriteCvtPH2PSLd, [SKLPort23,SKLPort01], 9, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
defm : X86WriteRes<WriteCvtPS2PH, [SKLPort5,SKLPort015], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteRes<WriteCvtPS2PHSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 6, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 8, [1,1,1,1], 4>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
// Strings instructions.
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Mon Jun 11 07:37:53 2018
@@ -182,77 +182,94 @@ defm : X86WriteRes<WriteFMoveX, [
defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
-defm : SKXWriteResPair<WriteFAdd, [SKXPort015], 4, [1], 1, 5>; // Floating point add/sub.
-defm : SKXWriteResPair<WriteFAddX, [SKXPort015], 4, [1], 1, 6>; // Floating point add/sub (XMM).
-defm : SKXWriteResPair<WriteFAddY, [SKXPort015], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
-defm : SKXWriteResPair<WriteFAdd64, [SKXPort015], 4, [1], 1, 5>; // Floating point double add/sub.
-defm : SKXWriteResPair<WriteFAdd64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double add/sub (XMM).
-defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double add/sub (YMM/ZMM).
-
-defm : SKXWriteResPair<WriteFCmp, [SKXPort015], 4, [1], 1, 5>; // Floating point compare.
-defm : SKXWriteResPair<WriteFCmpX, [SKXPort015], 4, [1], 1, 6>; // Floating point compare (XMM).
-defm : SKXWriteResPair<WriteFCmpY, [SKXPort015], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM).
-defm : SKXWriteResPair<WriteFCmp64, [SKXPort015], 4, [1], 1, 5>; // Floating point double compare.
-defm : SKXWriteResPair<WriteFCmp64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double compare (XMM).
-defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double compare (YMM/ZMM).
+defm : SKXWriteResPair<WriteFAdd, [SKXPort01], 4, [1], 1, 5>; // Floating point add/sub.
+defm : SKXWriteResPair<WriteFAddX, [SKXPort01], 4, [1], 1, 6>;
+defm : SKXWriteResPair<WriteFAddY, [SKXPort01], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFAddZ, [SKXPort05], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFAdd64, [SKXPort01], 4, [1], 1, 5>; // Floating point double add/sub.
+defm : SKXWriteResPair<WriteFAdd64X, [SKXPort01], 4, [1], 1, 6>;
+defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort01], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFAdd64Z, [SKXPort05], 4, [1], 1, 7>;
+
+defm : SKXWriteResPair<WriteFCmp, [SKXPort01], 4, [1], 1, 5>; // Floating point compare.
+defm : SKXWriteResPair<WriteFCmpX, [SKXPort01], 4, [1], 1, 6>;
+defm : SKXWriteResPair<WriteFCmpY, [SKXPort01], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFCmpZ, [SKXPort05], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFCmp64, [SKXPort01], 4, [1], 1, 5>; // Floating point double compare.
+defm : SKXWriteResPair<WriteFCmp64X, [SKXPort01], 4, [1], 1, 6>;
+defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort01], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFCmp64Z, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
-defm : SKXWriteResPair<WriteFMul, [SKXPort015], 4, [1], 1, 5>; // Floating point multiplication.
-defm : SKXWriteResPair<WriteFMulX, [SKXPort015], 4, [1], 1, 6>; // Floating point multiplication (XMM).
-defm : SKXWriteResPair<WriteFMulY, [SKXPort015], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
-defm : SKXWriteResPair<WriteFMul64, [SKXPort015], 4, [1], 1, 5>; // Floating point double multiplication.
-defm : SKXWriteResPair<WriteFMul64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double multiplication (XMM).
-defm : SKXWriteResPair<WriteFMul64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double multiplication (YMM/ZMM).
+defm : SKXWriteResPair<WriteFMul, [SKXPort01], 4, [1], 1, 5>; // Floating point multiplication.
+defm : SKXWriteResPair<WriteFMulX, [SKXPort01], 4, [1], 1, 6>;
+defm : SKXWriteResPair<WriteFMulY, [SKXPort01], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFMulZ, [SKXPort05], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFMul64, [SKXPort01], 4, [1], 1, 5>; // Floating point double multiplication.
+defm : SKXWriteResPair<WriteFMul64X, [SKXPort01], 4, [1], 1, 6>;
+defm : SKXWriteResPair<WriteFMul64Y, [SKXPort01], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFMul64Z, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
-//defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM).
-defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM).
-defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. // Floating point division (ZMM).
+//defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles.
+defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles.
+defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles.
//defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
-//defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM).
-//defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM).
-defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. // Floating point division (ZMM).
+//defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles.
+//defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles.
+defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles.
defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
-defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
-defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>; // Floating point square root (YMM).
-defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>; // Floating point square root (ZMM).
+defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>;
+defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>;
+defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>;
defm : SKXWriteResPair<WriteFSqrt64, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
-defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>; // Floating point double square root (XMM).
-defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>; // Floating point double square root (YMM).
-defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>; // Floating point double square root (ZMM).
+defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>;
+defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>;
+defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>;
defm : SKXWriteResPair<WriteFSqrt80, [SKXPort0,SKXFPDivider], 21, [1,7]>; // Floating point long double square root.
defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
-defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal estimate (XMM).
-defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
+defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>;
+defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFRcpZ, [SKXPort0,SKXPort5], 4, [2,1], 3, 7>;
defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
-defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal square root estimate (XMM).
-defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
-
-defm : SKXWriteResPair<WriteFMA, [SKXPort015], 4, [1], 1, 5>; // Fused Multiply Add.
-defm : SKXWriteResPair<WriteFMAX, [SKXPort015], 4, [1], 1, 6>; // Fused Multiply Add (XMM).
-defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
+defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>;
+defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFRsqrtZ,[SKXPort0,SKXPort5], 9, [2,1], 3, 7>;
+
+defm : SKXWriteResPair<WriteFMA, [SKXPort01], 4, [1], 1, 5>; // Fused Multiply Add.
+defm : SKXWriteResPair<WriteFMAX, [SKXPort01], 4, [1], 1, 6>;
+defm : SKXWriteResPair<WriteFMAY, [SKXPort01], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFMAZ, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product.
-defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>; // Floating point single dot product.
-defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
+defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>;
+defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>;
+defm : SKXWriteResPair<WriteDPPSZ,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>;
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
-defm : SKXWriteResPair<WriteFRnd, [SKXPort015], 8, [2], 2, 6>; // Floating point rounding.
-defm : SKXWriteResPair<WriteFRndY, [SKXPort015], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM).
+defm : SKXWriteResPair<WriteFRnd, [SKXPort01], 8, [2], 2, 6>; // Floating point rounding.
+defm : SKXWriteResPair<WriteFRndY, [SKXPort01], 8, [2], 2, 7>;
+defm : SKXWriteResPair<WriteFRndZ, [SKXPort05], 8, [2], 2, 7>;
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
-defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
+defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFLogicZ, [SKXPort05], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFTest, [SKXPort0], 2, [1], 1, 6>; // Floating point TEST instructions.
-defm : SKXWriteResPair<WriteFTestY, [SKXPort0], 2, [1], 1, 7>; // Floating point TEST instructions (YMM/ZMM).
+defm : SKXWriteResPair<WriteFTestY, [SKXPort0], 2, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFTestZ, [SKXPort0], 2, [1], 1, 7>;
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
-defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
+defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
-defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector variable shuffles.
+defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFVarShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
-defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; // Floating point vector blends.
+defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>;
+defm : SKXWriteResPair<WriteFBlendZ,[SKXPort015], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
-defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; // Fp vector variable blends.
+defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>;
+defm : SKXWriteResPair<WriteFVarBlendZ,[SKXPort015], 2, [2], 2, 7>;
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
@@ -279,47 +296,62 @@ defm : X86WriteRes<WriteVecMoveToGpr,
defm : X86WriteRes<WriteVecMoveFromGpr, [SKXPort5], 1, [1], 1>;
defm : SKXWriteResPair<WriteVecALU, [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
-defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM).
-defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
+defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>;
+defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>;
+defm : SKXWriteResPair<WriteVecALUZ, [SKXPort0], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecLogic, [SKXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor.
-defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (XMM).
-defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
+defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>;
+defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>;
+defm : SKXWriteResPair<WriteVecLogicZ,[SKXPort05], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
-defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; // Vector integer TEST instructions (YMM/ZMM).
+defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
+defm : SKXWriteResPair<WriteVecTestZ, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 4, [1], 1, 5>; // Vector integer multiply.
-defm : SKXWriteResPair<WriteVecIMulX, [SKXPort015], 4, [1], 1, 6>; // Vector integer multiply (XMM).
-defm : SKXWriteResPair<WriteVecIMulY, [SKXPort015], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
-defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector PMULLD.
-defm : SKXWriteResPair<WritePMULLDY, [SKXPort015], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
+defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 4, [1], 1, 6>;
+defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WritePMULLD, [SKXPort01], 10, [2], 2, 6>; // Vector PMULLD.
+defm : SKXWriteResPair<WritePMULLDY, [SKXPort01], 10, [2], 2, 7>;
+defm : SKXWriteResPair<WritePMULLDZ, [SKXPort05], 10, [2], 2, 7>;
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles.
-defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
-defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
+defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>;
+defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>;
+defm : SKXWriteResPair<WriteShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
-defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>; // Vector variable shuffles (XMM).
-defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector variable shuffles (YMM/ZMM).
+defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>;
+defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>;
+defm : SKXWriteResPair<WriteVarShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends.
-defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM).
+defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>;
+defm : SKXWriteResPair<WriteBlendZ,[SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends.
-defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
+defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>;
+defm : SKXWriteResPair<WriteVarBlendZ,[SKXPort05], 2, [1], 1, 6>;
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
-defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>; // Vector MPSAD.
+defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>;
+defm : SKXWriteResPair<WriteMPSADZ, [SKXPort5], 4, [2], 2, 7>;
defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 5>; // Vector PSADBW.
-defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW.
-defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; // Vector PSADBW.
-defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS.
+defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>;
+defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>;
+defm : SKXWriteResPair<WritePSADBWZ, [SKXPort5], 3, [1], 1, 7>;
+defm : SKXWriteResPair<WritePHMINPOS, [SKXPort0], 4, [1], 1, 6>; // Vector PHMINPOS.
// Vector integer shifts.
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1, [1], 1, 5>;
defm : X86WriteRes<WriteVecShiftX, [SKXPort5,SKXPort01], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftY, [SKXPort5,SKXPort01], 4, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftZ, [SKXPort5,SKXPort0], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftXLd, [SKXPort01,SKXPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftYLd, [SKXPort01,SKXPort23], 8, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftZLd, [SKXPort0,SKXPort23], 8, [1,1], 2>;
defm : SKXWriteResPair<WriteVecShiftImm, [SKXPort0], 1, [1], 1, 5>;
-defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts (XMM).
-defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>; // Vector integer immediate shifts (YMM/ZMM).
+defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts.
+defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>;
+defm : SKXWriteResPair<WriteVecShiftImmZ, [SKXPort0], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarVecShift, [SKXPort01], 1, [1], 1, 6>; // Variable vector shifts.
-defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>; // Variable vector shifts (YMM/ZMM).
+defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>;
+defm : SKXWriteResPair<WriteVarVecShiftZ, [SKXPort0], 1, [1], 1, 7>;
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SKXPort5]> {
@@ -343,36 +375,46 @@ def : WriteRes<WriteVecExtractSt, [SKXPo
}
// Conversion between integer and float.
-defm : SKXWriteResPair<WriteCvtSS2I, [SKXPort0,SKXPort015], 6, [1,1], 2>;
-defm : SKXWriteResPair<WriteCvtPS2I, [SKXPort1], 3>;
-defm : SKXWriteResPair<WriteCvtPS2IY, [SKXPort1], 3>;
-defm : SKXWriteResPair<WriteCvtSD2I, [SKXPort0,SKXPort015], 6, [1,1], 2>;
-defm : SKXWriteResPair<WriteCvtPD2I, [SKXPort1], 3>;
-defm : SKXWriteResPair<WriteCvtPD2IY, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteCvtSS2I, [SKXPort01], 6, [2], 2>; // Needs more work: DD vs DQ.
+defm : SKXWriteResPair<WriteCvtPS2I, [SKXPort01], 3>;
+defm : SKXWriteResPair<WriteCvtPS2IY, [SKXPort01], 3>;
+defm : SKXWriteResPair<WriteCvtPS2IZ, [SKXPort05], 3>;
+defm : SKXWriteResPair<WriteCvtSD2I, [SKXPort01], 6, [2], 2>;
+defm : SKXWriteResPair<WriteCvtPD2I, [SKXPort01], 3>;
+defm : SKXWriteResPair<WriteCvtPD2IY, [SKXPort01], 3>;
+defm : SKXWriteResPair<WriteCvtPD2IZ, [SKXPort05], 3>;
defm : SKXWriteResPair<WriteCvtI2SS, [SKXPort1], 4>;
-defm : SKXWriteResPair<WriteCvtI2PS, [SKXPort1], 4>;
-defm : SKXWriteResPair<WriteCvtI2PSY, [SKXPort1], 4>;
+defm : SKXWriteResPair<WriteCvtI2PS, [SKXPort01], 4>;
+defm : SKXWriteResPair<WriteCvtI2PSY, [SKXPort01], 4>;
+defm : SKXWriteResPair<WriteCvtI2PSZ, [SKXPort05], 4>; // Needs more work: DD vs DQ.
defm : SKXWriteResPair<WriteCvtI2SD, [SKXPort1], 4>;
-defm : SKXWriteResPair<WriteCvtI2PD, [SKXPort0,SKXPort5], 5, [1,1], 2>;
-defm : SKXWriteResPair<WriteCvtI2PDY, [SKXPort1], 4>;
+defm : SKXWriteResPair<WriteCvtI2PD, [SKXPort01], 4>;
+defm : SKXWriteResPair<WriteCvtI2PDY, [SKXPort01], 4>;
+defm : SKXWriteResPair<WriteCvtI2PDZ, [SKXPort05], 4>;
defm : SKXWriteResPair<WriteCvtSS2SD, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPS2PD, [SKXPort1], 3>;
-defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort5,SKXPort01], 3, [1,1], 2>;
+defm : SKXWriteResPair<WriteCvtPS2PDZ, [SKXPort05], 3, [2], 2>;
defm : SKXWriteResPair<WriteCvtSD2SS, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPD2PS, [SKXPort1], 3>;
-defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort5,SKXPort01], 3, [1,1], 2>;
+defm : SKXWriteResPair<WriteCvtPD2PSZ, [SKXPort05], 3, [2], 2>;
-defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort015], 5, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort015], 7, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort015], 9, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort015], 10, [1,1], 2>;
-
-defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort015], 5, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort015], 7, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 6, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 8, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort01], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZ, [SKXPort5,SKXPort0], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort01], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort01], 10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZLd, [SKXPort23,SKXPort05], 10, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort01], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHZ, [SKXPort5,SKXPort05], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 6, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 8, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHZSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort05], 8, [1,1,1,1], 4>;
// Strings instructions.
@@ -589,15 +631,15 @@ def SKXWriteResGroup9 : SchedWriteRes<[S
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z|Z128|Z256)rr",
- "VBLENDMPS(Z|Z128|Z256)rr",
+def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
+ "VBLENDMPS(Z128|Z256)rr",
"VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr",
"(V?)PADD(B|D|Q|W)rr",
"VPBLENDD(Y?)rri",
- "VPBLENDMB(Z|Z128|Z256)rr",
- "VPBLENDMD(Z|Z128|Z256)rr",
- "VPBLENDMQ(Z|Z128|Z256)rr",
- "VPBLENDMW(Z|Z128|Z256)rr",
+ "VPBLENDMB(Z128|Z256)rr",
+ "VPBLENDMD(Z128|Z256)rr",
+ "VPBLENDMQ(Z128|Z256)rr",
+ "VPBLENDMW(Z128|Z256)rr",
"VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rr",
"(V?)PSUB(B|D|Q|W)rr",
"VPTERNLOGD(Z|Z128|Z256)rri",
@@ -611,8 +653,7 @@ def SKXWriteResGroup10 : SchedWriteRes<[
def: InstRW<[SKXWriteResGroup10], (instrs CBW, CWDE, CDQE,
CMC, STC)>;
def: InstRW<[SKXWriteResGroup10], (instrs LAHF, SAHF)>; // TODO: This doesn't match Agner's data
-def: InstRW<[SKXWriteResGroup10], (instregex "NOOP",
- "SGDT64m",
+def: InstRW<[SKXWriteResGroup10], (instregex "SGDT64m",
"SIDT64m",
"SMSW16m",
"STRm",
@@ -739,8 +780,7 @@ def SKXWriteResGroup30 : SchedWriteRes<[
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup30], (instregex "KADD(B|D|Q|W)rr",
- "KMOV(B|D|Q|W)rk",
+def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
"KORTEST(B|D|Q|W)rr",
"KTEST(B|D|Q|W)rr")>;
@@ -768,6 +808,7 @@ def SKXWriteResGroup32 : SchedWriteRes<[
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
+ "KADD(B|D|Q|W)rr",
"KSHIFTL(B|D|Q|W)ri",
"KSHIFTR(B|D|Q|W)ri",
"KUNPCKBWrr",
@@ -907,26 +948,44 @@ def SKXWriteResGroup49 : SchedWriteRes<[
}
def: InstRW<[SKXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
-def SKXWriteResGroup50 : SchedWriteRes<[SKXPort015]> {
+def SKXWriteResGroup50 : SchedWriteRes<[SKXPort01]> {
let Latency = 4;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PS(Y|Z|Z128|Z256)rr",
+def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PS(Y|Z128|Z256)rr",
"(V?)CVTDQ2PSrr",
- "VCVTPD2QQ(Z|Z128|Z256)rr",
- "VCVTPD2UQQ(Z|Z128|Z256)rr",
- "VCVTPS2DQ(Y|Z|Z128|Z256)rr",
+ "VCVTPD2QQ(Z128|Z256)rr",
+ "VCVTPD2UQQ(Z128|Z256)rr",
+ "VCVTPS2DQ(Y|Z128|Z256)rr",
"(V?)CVTPS2DQrr",
- "VCVTPS2UDQ(Z|Z128|Z256)rr",
- "VCVTQQ2PD(Z|Z128|Z256)rr",
- "VCVTTPD2QQ(Z|Z128|Z256)rr",
- "VCVTTPD2UQQ(Z|Z128|Z256)rr",
- "VCVTTPS2DQ(Y|Z|Z128|Z256)rr",
+ "VCVTPS2UDQ(Z128|Z256)rr",
+ "VCVTQQ2PD(Z128|Z256)rr",
+ "VCVTTPD2QQ(Z128|Z256)rr",
+ "VCVTTPD2UQQ(Z128|Z256)rr",
+ "VCVTTPS2DQ(Z128|Z256)rr",
"(V?)CVTTPS2DQrr",
- "VCVTTPS2UDQ(Z|Z128|Z256)rr",
- "VCVTUDQ2PS(Z|Z128|Z256)rr",
- "VCVTUQQ2PD(Z|Z128|Z256)rr")>;
+ "VCVTTPS2UDQ(Z128|Z256)rr",
+ "VCVTUDQ2PS(Z128|Z256)rr",
+ "VCVTUQQ2PD(Z128|Z256)rr")>;
+
+def SKXWriteResGroup50z : SchedWriteRes<[SKXPort05]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SKXWriteResGroup50z], (instrs VCVTDQ2PSZrr,
+ VCVTPD2QQZrr,
+ VCVTPD2UQQZrr,
+ VCVTPS2DQZrr,
+ VCVTPS2UDQZrr,
+ VCVTQQ2PDZrr,
+ VCVTTPD2QQZrr,
+ VCVTTPD2UQQZrr,
+ VCVTTPS2DQZrr,
+ VCVTTPS2UDQZrr,
+ VCVTUDQ2PSZrr,
+ VCVTUQQ2PDZrr)>;
def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> {
let Latency = 4;
@@ -1010,13 +1069,6 @@ def: InstRW<[SKXWriteResGroup58], (instr
"MOVZX(16|32|64)rm8",
"(V?)MOVDDUPrm")>; // TODO: Should this be SKXWriteResGroup71?
-def SKXWriteResGroup59 : SchedWriteRes<[SKXPort015]> {
- let Latency = 5;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SKXWriteResGroup59], (instregex "VCVTSD2SSZrr")>;
-
def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 5;
let NumMicroOps = 2;
@@ -1035,7 +1087,7 @@ def: InstRW<[SKXWriteResGroup61], (instr
"VCVTPS2QQZ128rr",
"VCVTPS2UQQZ128rr",
"VCVTQQ2PSZ128rr",
- "(V?)CVTSD2SSrr",
+ "(V?)CVTSD2SS(Z?)rr",
"(V?)CVTSI(64)?2SDrr",
"VCVTSI2SSZrr",
"(V?)CVTSI2SSrr",
@@ -1136,7 +1188,7 @@ def: InstRW<[SKXWriteResGroup71], (instr
"VPBROADCASTDrm",
"VPBROADCASTQrm")>;
-def SKXWriteResGroup72 : SchedWriteRes<[SKXPort0]> {
+def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [2];
@@ -1286,7 +1338,7 @@ def: InstRW<[SKXWriteResGroup89], (instr
"VPBROADCASTDYrm",
"VPBROADCASTQYrm")>;
-def SKXWriteResGroup90 : SchedWriteRes<[SKXPort0,SKXPort5]> {
+def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
@@ -1318,21 +1370,40 @@ def SKXWriteResGroup93 : SchedWriteRes<[
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PD(Z|Z256)rr",
- "VCVTPD2DQ(Y|Z|Z256)rr",
- "VCVTPD2PS(Y|Z|Z256)rr",
- "VCVTPD2UDQ(Z|Z256)rr",
- "VCVTPS2PD(Y|Z|Z256)rr",
- "VCVTPS2QQ(Z|Z256)rr",
- "VCVTPS2UQQ(Z|Z256)rr",
- "VCVTQQ2PS(Z|Z256)rr",
- "VCVTTPD2DQ(Y|Z|Z256)rr",
- "VCVTTPD2UDQ(Z|Z256)rr",
- "VCVTTPS2QQ(Z|Z256)rr",
- "VCVTTPS2UQQ(Z|Z256)rr",
- "VCVTUDQ2PD(Z|Z256)rr",
- "VCVTUQQ2PS(Z|Z256)rr")>;
-
+def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr",
+ "VCVTPD2DQ(Y|Z256)rr",
+ "VCVTPD2PS(Y|Z256)rr",
+ "VCVTPD2UDQZ256rr",
+ "VCVTPS2PD(Y|Z256)rr",
+ "VCVTPS2QQZ256rr",
+ "VCVTPS2UQQZ256rr",
+ "VCVTQQ2PSZ256rr",
+ "VCVTTPD2DQ(Y|Z256)rr",
+ "VCVTTPD2UDQZ256rr",
+ "VCVTTPS2QQZ256rr",
+ "VCVTTPS2UQQZ256rr",
+ "VCVTUDQ2PDZ256rr",
+ "VCVTUQQ2PSZ256rr")>;
+
+def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr,
+ VCVTPD2DQZrr,
+ VCVTPD2PSZrr,
+ VCVTPD2UDQZrr,
+ VCVTPS2PDZrr,
+ VCVTPS2QQZrr,
+ VCVTPS2UQQZrr,
+ VCVTQQ2PSZrr,
+ VCVTTPD2DQZrr,
+ VCVTTPD2UDQZrr,
+ VCVTTPS2QQZrr,
+ VCVTTPS2UQQZrr,
+ VCVTUDQ2PDZrr,
+ VCVTUQQ2PSZrr)>;
def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 7;
@@ -1405,8 +1476,8 @@ def SKXWriteResGroup100 : SchedWriteRes<
let ResourceCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup100], (instregex "VCVTSS2USI64Zrr",
- "VCVTTSS2SI(64)?Zrr",
- "(V?)CVTTSS2SI(64)?rr",
+ "(V?)CVTSS2SI64(Z?)rr",
+ "(V?)CVTTSS2SI64(Z?)rr",
"VCVTTSS2USI64Zrr")>;
def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> {
@@ -1751,16 +1822,6 @@ def SKXWriteResGroup137 : SchedWriteRes<
def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
"(V?)CVTPS2PDrm")>;
-def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SKXWriteResGroup138], (instregex "VRCP14PDZr(b?)",
- "VRCP14PSZr(b?)",
- "VRSQRT14PDZr(b?)",
- "VRSQRT14PSZr(b?)")>;
-
def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 3;
@@ -2009,12 +2070,19 @@ def SKXWriteResGroup171 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
-def SKXWriteResGroup174 : SchedWriteRes<[SKXPort015]> {
+def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> {
let Latency = 12;
let NumMicroOps = 3;
let ResourceCycles = [3];
}
-def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z|Z128|Z256)rr")>;
+def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
+
+def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def: InstRW<[SKXWriteResGroup174z], (instregex "VPMULLQZrr")>;
def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 12;
@@ -2143,16 +2211,6 @@ def SKXWriteResGroup195 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
-def SKXWriteResGroup198 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> {
- let Latency = 16;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKXWriteResGroup198], (instregex "VRCP14PDZm(b?)",
- "VRCP14PSZm(b?)",
- "VRSQRT14PDZm(b?)",
- "VRSQRT14PSZm(b?)")>;
-
def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 16;
let NumMicroOps = 14;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Mon Jun 11 07:37:53 2018
@@ -62,7 +62,6 @@ multiclass X86WriteResPairUnsupported<X8
}
}
-
// Multiclass that wraps X86FoldableSchedWrite for each vector width.
class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
X86FoldableSchedWrite s128,
@@ -177,23 +176,29 @@ def WriteFMoveY : SchedWrite;
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
-defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM/ZMM).
+defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM).
+defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM).
defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub.
defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
-defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM/ZMM).
+defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM).
+defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM).
defm WriteFCmp : X86SchedWritePair; // Floating point compare.
defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM).
-defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM/ZMM).
+defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM).
+defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM).
defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare.
defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
-defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM/ZMM).
+defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM).
+defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM).
defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM).
-defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM/ZMM).
+defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM).
+defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM).
defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication.
defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
-defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM/ZMM).
+defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM).
+defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM).
defm WriteFDiv : X86SchedWritePair; // Floating point division.
defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM).
defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM).
@@ -213,41 +218,54 @@ defm WriteFSqrt64Z : X86SchedWritePair;
defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root.
defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
-defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM/ZMM).
+defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM).
+defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM).
defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
-defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM/ZMM).
+defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM).
+defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM).
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM).
-defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM).
+defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM).
+defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM).
defm WriteDPPD : X86SchedWritePair; // Floating point double dot product.
defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
+defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM).
defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
defm WriteFRnd : X86SchedWritePair; // Floating point rounding.
-defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM/ZMM).
+defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM).
+defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM).
defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
-defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM).
+defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM).
+defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM).
defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions.
-defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM/ZMM).
+defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM).
+defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM).
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
-defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM/ZMM).
+defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM).
+defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM).
defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
-defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM/ZMM).
+defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM).
+defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM).
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
-defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM/ZMM).
+defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM).
+defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM).
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
-defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM/ZMM).
+defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM).
+defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM).
// FMA Scheduling helper class.
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Horizontal Add/Sub (float and integer)
defm WriteFHAdd : X86SchedWritePair;
-defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM.
+defm WriteFHAddY : X86SchedWritePair;
+defm WriteFHAddZ : X86SchedWritePair;
defm WritePHAdd : X86SchedWritePair;
-defm WritePHAddX : X86SchedWritePair; // XMM.
-defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
+defm WritePHAddX : X86SchedWritePair;
+defm WritePHAddY : X86SchedWritePair;
+defm WritePHAddZ : X86SchedWritePair;
// Vector integer operations.
def WriteVecLoad : SchedWrite;
@@ -272,38 +290,51 @@ def WriteVecMoveFromGpr : SchedWrite;
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
-defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
+defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM).
+defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM).
defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
-defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
+defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM).
+defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM).
defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions.
-defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM/ZMM).
+defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM).
+defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM).
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default).
defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
-defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM/ZMM).
+defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM).
+defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM).
defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default).
defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
-defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM/ZMM).
+defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM).
+defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM).
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default).
defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM).
-defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM/ZMM).
+defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM).
+defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM).
defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
-defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM/ZMM).
+defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM).
+defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM).
defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
-defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM/ZMM).
+defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM).
+defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM).
defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
-defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM/ZMM).
+defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM).
+defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM).
defm WriteBlend : X86SchedWritePair; // Vector blends.
-defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM/ZMM).
+defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM).
+defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM).
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
-defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM/ZMM).
+defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM).
+defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM).
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
-defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM/ZMM).
+defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM).
+defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM).
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
-defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM/ZMM).
+defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM).
+defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM).
defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
// Vector insert/extract operations.
@@ -320,35 +351,44 @@ def WriteMMXMOVMSK : SchedWrite;
// Conversion between integer and float.
defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer.
defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM).
-defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM/ZMM).
+defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM).
+defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM).
defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM).
-defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM/ZMM).
+defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM).
+defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM).
defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double.
defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM).
-defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM/ZMM).
+defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM).
+defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM).
defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float.
defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM).
-defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM/ZMM).
+defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM).
+defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM).
defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion.
defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM).
-defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM/ZMM).
+defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM).
+defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM).
defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion.
defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM).
-defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM/ZMM).
+defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM).
+defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM).
defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
-defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM/ZMM).
+defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM).
+defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM).
def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
-def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM/ZMM).
+def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM).
+def WriteCvtPS2PHZ : SchedWrite; // // Float -> Half size conversion (ZMM).
def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion.
-def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM/ZMM).
+def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM).
+def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
// CRC32 instruction.
defm WriteCRC32 : X86SchedWritePair;
@@ -387,7 +427,8 @@ defm WriteFVarShuffle256 : X86SchedWrite
defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
-defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM/ZMM).
+defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM).
+defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM).
// Old microcoded instructions that nobody use.
def WriteMicrocoded : SchedWrite;
@@ -441,25 +482,25 @@ def SchedWriteVecMoveLSNT
// Vector width wrappers.
def SchedWriteFAdd
- : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddY>;
+ : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
def SchedWriteFAdd64
- : X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Y>;
+ : X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>;
def SchedWriteFHAdd
- : X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddY>;
+ : X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>;
def SchedWriteFCmp
- : X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpY>;
+ : X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>;
def SchedWriteFCmp64
- : X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Y>;
+ : X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>;
def SchedWriteFMul
- : X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulY>;
+ : X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>;
def SchedWriteFMul64
- : X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Y>;
+ : X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>;
def SchedWriteFMA
- : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAY>;
+ : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>;
def SchedWriteDPPD
: X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>;
def SchedWriteDPPS
- : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSY>;
+ : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>;
def SchedWriteFDiv
: X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
def SchedWriteFDiv64
@@ -471,90 +512,90 @@ def SchedWriteFSqrt64
: X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
WriteFSqrt64Y, WriteFSqrt64Z>;
def SchedWriteFRcp
- : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpY>;
+ : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>;
def SchedWriteFRsqrt
- : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtY>;
+ : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>;
def SchedWriteFRnd
- : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndY>;
+ : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>;
def SchedWriteFLogic
- : X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicY>;
+ : X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>;
def SchedWriteFTest
- : X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestY>;
+ : X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>;
def SchedWriteFShuffle
: X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
- WriteFShuffleY, WriteFShuffleY>;
+ WriteFShuffleY, WriteFShuffleZ>;
def SchedWriteFVarShuffle
: X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
- WriteFVarShuffleY, WriteFVarShuffleY>;
+ WriteFVarShuffleY, WriteFVarShuffleZ>;
def SchedWriteFBlend
- : X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendY>;
+ : X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>;
def SchedWriteFVarBlend
: X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
- WriteFVarBlendY, WriteFVarBlendY>;
+ WriteFVarBlendY, WriteFVarBlendZ>;
def SchedWriteCvtDQ2PD
: X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD,
- WriteCvtI2PDY, WriteCvtI2PDY>;
+ WriteCvtI2PDY, WriteCvtI2PDZ>;
def SchedWriteCvtDQ2PS
: X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS,
- WriteCvtI2PSY, WriteCvtI2PSY>;
+ WriteCvtI2PSY, WriteCvtI2PSZ>;
def SchedWriteCvtPD2DQ
: X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I,
- WriteCvtPD2IY, WriteCvtPD2IY>;
+ WriteCvtPD2IY, WriteCvtPD2IZ>;
def SchedWriteCvtPS2DQ
: X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I,
- WriteCvtPS2IY, WriteCvtPS2IY>;
+ WriteCvtPS2IY, WriteCvtPS2IZ>;
def SchedWriteCvtPS2PD
: X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
- WriteCvtPS2PDY, WriteCvtPS2PDY>;
+ WriteCvtPS2PDY, WriteCvtPS2PDZ>;
def SchedWriteCvtPD2PS
: X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
- WriteCvtPD2PSY, WriteCvtPD2PSY>;
+ WriteCvtPD2PSY, WriteCvtPD2PSZ>;
def SchedWriteVecALU
- : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUY>;
+ : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>;
def SchedWritePHAdd
- : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddY>;
+ : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>;
def SchedWriteVecLogic
: X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
- WriteVecLogicY, WriteVecLogicY>;
+ WriteVecLogicY, WriteVecLogicZ>;
def SchedWriteVecTest
: X86SchedWriteWidths<WriteVecTest, WriteVecTest,
- WriteVecTestY, WriteVecTestY>;
+ WriteVecTestY, WriteVecTestZ>;
def SchedWriteVecShift
: X86SchedWriteWidths<WriteVecShift, WriteVecShiftX,
- WriteVecShiftY, WriteVecShiftY>;
+ WriteVecShiftY, WriteVecShiftZ>;
def SchedWriteVecShiftImm
: X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX,
- WriteVecShiftImmY, WriteVecShiftImmY>;
+ WriteVecShiftImmY, WriteVecShiftImmZ>;
def SchedWriteVarVecShift
: X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift,
- WriteVarVecShiftY, WriteVarVecShiftY>;
+ WriteVarVecShiftY, WriteVarVecShiftZ>;
def SchedWriteVecIMul
: X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX,
- WriteVecIMulY, WriteVecIMulY>;
+ WriteVecIMulY, WriteVecIMulZ>;
def SchedWritePMULLD
: X86SchedWriteWidths<WritePMULLD, WritePMULLD,
- WritePMULLDY, WritePMULLDY>;
+ WritePMULLDY, WritePMULLDZ>;
def SchedWriteMPSAD
: X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
- WriteMPSADY, WriteMPSADY>;
+ WriteMPSADY, WriteMPSADZ>;
def SchedWritePSADBW
: X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
- WritePSADBWY, WritePSADBWY>;
+ WritePSADBWY, WritePSADBWZ>;
def SchedWriteShuffle
: X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
- WriteShuffleY, WriteShuffleY>;
+ WriteShuffleY, WriteShuffleZ>;
def SchedWriteVarShuffle
: X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
- WriteVarShuffleY, WriteVarShuffleY>;
+ WriteVarShuffleY, WriteVarShuffleZ>;
def SchedWriteBlend
- : X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendY>;
+ : X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>;
def SchedWriteVarBlend
: X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
- WriteVarBlendY, WriteVarBlendY>;
+ WriteVarBlendY, WriteVarBlendZ>;
// Vector size wrappers.
def SchedWriteFAddSizes
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Mon Jun 11 07:37:53 2018
@@ -210,28 +210,36 @@ defm : X86WriteRes<WriteEMMS, [Ato
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAddX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAddY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : AtomWriteResPair<WriteFAdd64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAdd64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteFAdd64Y, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmpX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmpY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : AtomWriteResPair<WriteFCmp64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmp64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteFCmp64Y, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMulY, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul64X, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFMul64Y, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRcpX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFRcpY, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFRsqrtY, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFDivX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
defm : AtomWriteResPair<WriteFDivY, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
@@ -252,24 +260,33 @@ defm : AtomWriteResPair<WriteFSqrt80,
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFRndY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFLogicY>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : AtomWriteResPair<WriteFTest, [AtomPort01], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFTestY>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFShuffleY>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleY>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : X86WriteResPairUnsupported<WriteDPPD>;
defm : X86WriteResPairUnsupported<WriteDPPS>;
defm : X86WriteResPairUnsupported<WriteDPPSY>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : X86WriteResPairUnsupported<WriteFBlend>;
defm : X86WriteResPairUnsupported<WriteFBlendY>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : X86WriteResPairUnsupported<WriteFVarBlend>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
@@ -280,30 +297,39 @@ defm : X86WriteResPairUnsupported<WriteF
defm : AtomWriteResPair<WriteCvtSS2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteCvtPS2I, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPS2IY, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : AtomWriteResPair<WriteCvtSD2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteCvtPD2I, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtPD2IY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : AtomWriteResPair<WriteCvtI2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtI2PS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtI2PSY, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : AtomWriteResPair<WriteCvtI2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtI2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtI2PDY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtPS2PDY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtPD2PSY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PH>;
defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////
// Vector integer operations.
@@ -334,42 +360,56 @@ defm : X86WriteRes<WriteVecMoveFromGpr,
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecALUX, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecALUY, [AtomPort01], [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicX, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : AtomWriteResPair<WriteVecTest, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecTestY, [AtomPort01], [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : AtomWriteResPair<WriteVecShiftX, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : AtomWriteResPair<WriteVecShiftY, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : AtomWriteResPair<WriteVecShiftImm, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
defm : AtomWriteResPair<WriteVecShiftImmX, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
defm : AtomWriteResPair<WriteVecShiftImmY, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteVecIMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteVecIMulY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : AtomWriteResPair<WritePMULLD, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WritePMULLDY, [AtomPort01], [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : AtomWriteResPair<WritePHMINPOS, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteMPSAD, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteMPSADY, [AtomPort01], [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : AtomWriteResPair<WritePSADBW, [AtomPort01], [AtomPort01], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WritePSADBWX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WritePSADBWY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : AtomWriteResPair<WriteShuffle, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteShuffleX, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteShuffleY, [AtomPort0], [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : AtomWriteResPair<WriteVarShuffle, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
defm : AtomWriteResPair<WriteVarShuffleY, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : X86WriteResPairUnsupported<WriteBlend>;
defm : X86WriteResPairUnsupported<WriteBlendY>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : X86WriteResPairUnsupported<WriteVarBlend>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarVecShift>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
////////////////////////////////////////////////////////////////////////////////
// Vector insert/extract operations.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Mon Jun 11 07:37:53 2018
@@ -298,34 +298,44 @@ defm : X86WriteRes<WriteEMMS, [
defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>;
defm : JWriteResFpuPair<WriteFAddX, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFAddY, [JFPU0, JFPA], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : JWriteResFpuPair<WriteFAdd64, [JFPU0, JFPA], 3>;
defm : JWriteResFpuPair<WriteFAdd64X, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFAdd64Y, [JFPU0, JFPA], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>;
defm : JWriteResFpuPair<WriteFCmpX, [JFPU0, JFPA], 2>;
defm : JWriteResYMMPair<WriteFCmpY, [JFPU0, JFPA], 2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : JWriteResFpuPair<WriteFCmp64, [JFPU0, JFPA], 2>;
defm : JWriteResFpuPair<WriteFCmp64X, [JFPU0, JFPA], 2>;
defm : JWriteResYMMPair<WriteFCmp64Y, [JFPU0, JFPA], 2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFMulX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFMulY, [JFPU1, JFPM], 2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : JWriteResFpuPair<WriteFMul64, [JFPU1, JFPM], 4, [1,2]>;
defm : JWriteResFpuPair<WriteFMul64X, [JFPU1, JFPM], 4, [1,2]>;
defm : JWriteResYMMPair<WriteFMul64Y, [JFPU1, JFPM], 4, [2,4], 2>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : JWriteResFpuPair<WriteDPPD, [JFPU1, JFPM, JFPA], 9, [1, 3, 3], 3>;
defm : JWriteResFpuPair<WriteDPPS, [JFPU1, JFPM, JFPA], 11, [1, 3, 3], 5>;
defm : JWriteResYMMPair<WriteDPPSY, [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRcpX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRcpY, [JFPU1, JFPM], 2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRsqrtX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRsqrtY, [JFPU1, JFPM], 2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResFpuPair<WriteFDivX, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResYMMPair<WriteFDivY, [JFPU1, JFPM], 38, [2, 38], 2>;
@@ -346,18 +356,25 @@ defm : JWriteResFpuPair<WriteFSqrt80,
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRnd, [JFPU1, JSTC], 3>;
defm : JWriteResYMMPair<WriteFRndY, [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : JWriteResFpuPair<WriteFTest, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResYMMPair<WriteFTestY , [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFShuffleY, [JFPU01, JFPX], 1, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 3, [2, 6], 6>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFBlendY, [JFPU01, JFPX], 1, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [2, 6], 6>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
@@ -368,33 +385,42 @@ defm : X86WriteResPairUnsupported<WriteF
defm : JWriteResFpuPair<WriteCvtSS2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPS2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPS2IY, [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : JWriteResFpuPair<WriteCvtSD2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPD2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2IY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
// FIXME: f+3 ST, LD+STC latency
defm : JWriteResFpuPair<WriteCvtI2SS, [JFPU1, JSTC], 9, [1,1], 2>;
defm : JWriteResFpuPair<WriteCvtI2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtI2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : JWriteResFpuPair<WriteCvtI2SD, [JFPU1, JSTC], 9, [1,1], 2>;
defm : JWriteResFpuPair<WriteCvtI2PD, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtI2PDY, [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : JWriteResFpuPair<WriteCvtSS2SD, [JFPU1, JSTC], 7, [1,2], 2>;
defm : JWriteResFpuPair<WriteCvtPS2PD, [JFPU1, JSTC], 2, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPS2PDY, [JFPU1, JSTC], 2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : JWriteResFpuPair<WriteCvtSD2SS, [JFPU1, JSTC], 7, [1,2], 2>;
defm : JWriteResFpuPair<WriteCvtPD2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2PSY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : JWriteResFpuPair<WriteCvtPH2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPH2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteRes<WriteCvtPS2PH, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHY, [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteRes<WriteCvtPS2PHSt, [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////
// Vector integer operations.
@@ -425,42 +451,56 @@ defm : X86WriteRes<WriteVecMoveFromGpr,
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecALUX, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecALUY, [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftX, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftY, [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : JWriteResFpuPair<WriteVecShiftImm, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftImmY,[JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulX, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulY, [JFPU0, JVIMUL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : JWriteResFpuPair<WritePMULLDY, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : JWriteResFpuPair<WriteMPSADY, [JFPU0, JVIMUL], 3, [1, 2]>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePSADBWX, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePSADBWY, [JFPU01, JVALU], 2>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>;
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffleX, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffleY, [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVarShuffleY, [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteBlendY, [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVarBlendY, [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecLogicX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicY>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : JWriteResFpuPair<WriteVecTest, [JFPU0, JFPA, JALU0], 3>;
-defm : JWriteResYMMPair<WriteVecTestY , [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
+defm : JWriteResYMMPair<WriteVecTestY, [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : JWriteResFpuPair<WriteShuffle256, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarVecShift>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
////////////////////////////////////////////////////////////////////////////////
// Vector insert/extract operations.
@@ -555,7 +595,7 @@ def JWriteZeroLatency : SchedWriteRes<[]
let Latency = 0;
}
-// Certain instructions that use the same register for both source
+// Certain instructions that use the same register for both source
// operands do not have a real dependency on the previous contents of the
// register, and thus, do not have to wait before completing. They can be
// optimized out at register renaming stage.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Mon Jun 11 07:37:53 2018
@@ -159,22 +159,28 @@ defm : X86WriteRes<WriteEMMS, [SLM
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : SLMWriteResPair<WriteFAdd64, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmpX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmpY, [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : SLMWriteResPair<WriteFCmp64, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp64X, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp64Y, [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : SLMWriteResPair<WriteFMul64, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
@@ -186,9 +192,11 @@ defm : X86WriteResPairUnsupported<WriteF
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>;
defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
@@ -201,40 +209,52 @@ defm : SLMWriteResPair<WriteFSqrt80, [S
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : SLMWriteResPair<WriteFTest, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFTestY, [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFShuffleY, [SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
// Conversion between integer and float.
defm : SLMWriteResPair<WriteCvtSS2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2IY, [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : SLMWriteResPair<WriteCvtSD2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2IY, [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : SLMWriteResPair<WriteCvtI2SS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PSY, [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : SLMWriteResPair<WriteCvtI2SD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PDY, [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : SLMWriteResPair<WriteCvtSS2SD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : SLMWriteResPair<WriteCvtSD2SS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
// Vector integer operations.
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
@@ -260,37 +280,49 @@ def : WriteRes<WriteVecMoveFromGpr, [S
defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftY, [SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : SLMWriteResPair<WriteVecShiftImm, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicX,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : SLMWriteResPair<WriteVecTest, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecTestY, [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
// FIXME: The below is closer to correct, but caused some perf regressions.
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0], 4>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0], 7>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0], 4>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>;
// Vector insert/extract operations.
@@ -309,9 +341,11 @@ def : WriteRes<WriteVecExtractSt, [SLM_
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
+defm : X86WriteResPairUnsupported<WriteFHAddZ>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WritePHAddZ>;
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
@@ -407,25 +441,33 @@ def : WriteRes<WriteNop, []>;
// scheduling resources anyway.
def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
defm : X86WriteResPairUnsupported<WriteFBlendY>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PH>;
defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
} // SchedModel
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Mon Jun 11 07:37:53 2018
@@ -212,34 +212,45 @@ defm : X86WriteRes<WriteFMoveY, [
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU0], 3>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU0], 3>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmpX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmpY, [ZnFPU0], 3>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : ZnWriteResFpuPair<WriteFCmp64, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU0], 3>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>;
defm : ZnWriteResFpuPair<WriteVarBlendY, [ZnFPU0], 1>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : ZnWriteResFpuPair<WriteCvtSS2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtPS2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtPS2IY, [ZnFPU3], 5>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : ZnWriteResFpuPair<WriteCvtSD2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtPD2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtPD2IY, [ZnFPU3], 5>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : ZnWriteResFpuPair<WriteCvtI2SS, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtI2PS, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtI2PSY, [ZnFPU3], 5>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : ZnWriteResFpuPair<WriteCvtI2SD, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtI2PD, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtI2PDY, [ZnFPU3], 5>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>;
//defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>;
@@ -251,29 +262,39 @@ defm : X86WriteResPairUnsupported<WriteF
defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : ZnWriteResFpuPair<WriteFTest, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFTestY, [ZnFPU], 1>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU01], 3, [1], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFMulX, [ZnFPU01], 3, [1], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFMulY, [ZnFPU01], 4, [1], 1, 7, 1>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : ZnWriteResFpuPair<WriteFMul64, [ZnFPU01], 3, [1], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFMul64X, [ZnFPU01], 3, [1], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFMul64Y, [ZnFPU01], 4, [1], 1, 7, 1>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>;
defm : ZnWriteResFpuPair<WriteFMAX, [ZnFPU03], 5>;
defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
defm : ZnWriteResFpuPair<WriteFRcpX, [ZnFPU01], 5>;
defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [1], 1, 7, 2>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
//defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU02], 5>;
defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5, [1], 1, 7, 1>;
//defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20, [20]>;
defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 20, [20]>;
defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 28, [28], 1, 7, 1>;
@@ -309,40 +330,52 @@ defm : X86WriteRes<WriteEMMS,
defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU2], 2>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : ZnWriteResFpuPair<WriteVecShiftImm, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 1, [2], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 1, [2], 1, 7, 1>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALUX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALUY, [ZnFPU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WriteVecIMulX, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4, [1], 1, 7, 1>; // FIXME
defm : ZnWriteResFpuPair<WritePMULLDY, [ZnFPU0], 5, [2], 1, 7, 1>; // FIXME
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteShuffleX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteShuffleY, [ZnFPU], 1>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU], 1>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePSADBWX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>;
// Vector Shift Operations
defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU12], 1>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
// Vector insert/extract operations.
defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>;
@@ -1144,9 +1177,10 @@ def ZnWriteCVTPD2PSYr: SchedWriteRes<[Zn
// CVTPD2PS.
// x,x.
def : SchedAlias<WriteCvtPD2PS, ZnWriteCVTPD2PSr>;
-
// y,y.
def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>;
+// z,z.
+defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
let Latency = 11;
@@ -1161,6 +1195,8 @@ def ZnWriteCVTPD2PSYLd : SchedWriteRes<[
let Latency = 11;
}
def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>;
+// z,m512
+defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>;
// CVTSD2SS.
// x,x.
@@ -1185,12 +1221,14 @@ def ZnWriteCVTPS2PDLd : SchedWriteRes<[Z
}
def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>;
def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>;
+defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>;
// y,x.
def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> {
let Latency = 3;
}
def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>;
+defm : X86WriteResUnsupported<WriteCvtPS2PDZ>;
// CVTSS2SD.
// x,x.
@@ -1288,17 +1326,21 @@ def : InstRW<[ZnWriteCVSTSI2SILd], (inst
// x,v,i.
def : SchedAlias<WriteCvtPS2PH, ZnWriteMicrocoded>;
def : SchedAlias<WriteCvtPS2PHY, ZnWriteMicrocoded>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
// m,v,i.
def : SchedAlias<WriteCvtPS2PHSt, ZnWriteMicrocoded>;
def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
// VCVTPH2PS.
// v,x.
def : SchedAlias<WriteCvtPH2PS, ZnWriteMicrocoded>;
def : SchedAlias<WriteCvtPH2PSY, ZnWriteMicrocoded>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
// v,m.
def : SchedAlias<WriteCvtPH2PSLd, ZnWriteMicrocoded>;
def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
//-- SSE4A instructions --//
// EXTRQ
Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Mon Jun 11 07:37:53 2018
@@ -42,7 +42,7 @@ define <4 x double> @test_addpd(<4 x dou
;
; SKX-LABEL: test_addpd:
; SKX: # %bb.0:
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -96,7 +96,7 @@ define <8 x float> @test_addps(<8 x floa
;
; SKX-LABEL: test_addps:
; SKX: # %bb.0:
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -150,7 +150,7 @@ define <4 x double> @test_addsubpd(<4 x
;
; SKX-LABEL: test_addsubpd:
; SKX: # %bb.0:
-; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -205,7 +205,7 @@ define <8 x float> @test_addsubps(<8 x f
;
; SKX-LABEL: test_addsubps:
; SKX: # %bb.0:
-; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -267,7 +267,7 @@ define <4 x double> @test_andnotpd(<4 x
; SKX: # %bb.0:
; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_andnotpd:
@@ -336,7 +336,7 @@ define <8 x float> @test_andnotps(<8 x f
; SKX: # %bb.0:
; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_andnotps:
@@ -405,7 +405,7 @@ define <4 x double> @test_andpd(<4 x dou
; SKX: # %bb.0:
; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_andpd:
@@ -472,7 +472,7 @@ define <8 x float> @test_andps(<8 x floa
; SKX: # %bb.0:
; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_andps:
@@ -538,7 +538,7 @@ define <4 x double> @test_blendpd(<4 x d
; SKX-LABEL: test_blendpd:
; SKX: # %bb.0:
; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
-; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -602,7 +602,7 @@ define <8 x float> @test_blendps(<8 x fl
; SKX: # %bb.0:
; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
; SKX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_blendps:
@@ -956,7 +956,7 @@ define <4 x double> @test_cmppd(<4 x dou
;
; SKX-LABEL: test_cmppd:
; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
+; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -1022,7 +1022,7 @@ define <8 x float> @test_cmpps(<8 x floa
;
; SKX-LABEL: test_cmpps:
; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
+; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -1090,7 +1090,7 @@ define <4 x double> @test_cvtdq2pd(<4 x
; SKX: # %bb.0:
; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cvtdq2pd:
@@ -1153,9 +1153,9 @@ define <8 x float> @test_cvtdq2ps(<8 x i
;
; SKX-LABEL: test_cvtdq2ps:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cvtdq2ps:
@@ -1217,7 +1217,7 @@ define <8 x i32> @test_cvtpd2dq(<4 x dou
; SKX-LABEL: test_cvtpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:0.50]
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -1281,7 +1281,7 @@ define <8 x i32> @test_cvttpd2dq(<4 x do
; SKX-LABEL: test_cvttpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:0.50]
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -1406,7 +1406,7 @@ define <8 x i32> @test_cvtps2dq(<8 x flo
;
; SKX-LABEL: test_cvtps2dq:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50]
; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -1470,7 +1470,7 @@ define <8 x i32> @test_cvttps2dq(<8 x fl
;
; SKX-LABEL: test_cvttps2dq:
; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:0.50]
; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50]
; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -1979,7 +1979,7 @@ define <8 x float> @test_insertf128(<8 x
; SKX: # %bb.0:
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_insertf128:
@@ -2334,7 +2334,7 @@ define <4 x double> @test_maxpd(<4 x dou
;
; SKX-LABEL: test_maxpd:
; SKX: # %bb.0:
-; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2389,7 +2389,7 @@ define <8 x float> @test_maxps(<8 x floa
;
; SKX-LABEL: test_maxps:
; SKX: # %bb.0:
-; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2444,7 +2444,7 @@ define <4 x double> @test_minpd(<4 x dou
;
; SKX-LABEL: test_minpd:
; SKX: # %bb.0:
-; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2499,7 +2499,7 @@ define <8 x float> @test_minps(<8 x floa
;
; SKX-LABEL: test_minps:
; SKX: # %bb.0:
-; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2560,7 +2560,7 @@ define <4 x double> @test_movapd(<4 x do
; SKX-LABEL: test_movapd:
; SKX: # %bb.0:
; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2622,7 +2622,7 @@ define <8 x float> @test_movaps(<8 x flo
; SKX-LABEL: test_movaps:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2685,7 +2685,7 @@ define <4 x double> @test_movddup(<4 x d
; SKX: # %bb.0:
; SKX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_movddup:
@@ -2912,7 +2912,7 @@ define <4 x double> @test_movntpd(<4 x d
;
; SKX-LABEL: test_movntpd:
; SKX: # %bb.0:
-; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2965,7 +2965,7 @@ define <8 x float> @test_movntps(<8 x fl
;
; SKX-LABEL: test_movntps:
; SKX: # %bb.0:
-; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3025,7 +3025,7 @@ define <8 x float> @test_movshdup(<8 x f
; SKX: # %bb.0:
; SKX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_movshdup:
@@ -3088,7 +3088,7 @@ define <8 x float> @test_movsldup(<8 x f
; SKX: # %bb.0:
; SKX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_movsldup:
@@ -3152,7 +3152,7 @@ define <4 x double> @test_movupd(<4 x do
; SKX-LABEL: test_movupd:
; SKX: # %bb.0:
; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3216,7 +3216,7 @@ define <8 x float> @test_movups(<8 x flo
; SKX-LABEL: test_movups:
; SKX: # %bb.0:
; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3272,7 +3272,7 @@ define <4 x double> @test_mulpd(<4 x dou
;
; SKX-LABEL: test_mulpd:
; SKX: # %bb.0:
-; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3326,7 +3326,7 @@ define <8 x float> @test_mulps(<8 x floa
;
; SKX-LABEL: test_mulps:
; SKX: # %bb.0:
-; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3387,7 +3387,7 @@ define <4 x double> @orpd(<4 x double> %
; SKX: # %bb.0:
; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: orpd:
@@ -3454,7 +3454,7 @@ define <8 x float> @test_orps(<8 x float
; SKX: # %bb.0:
; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_orps:
@@ -3521,7 +3521,7 @@ define <4 x double> @test_perm2f128(<4 x
; SKX: # %bb.0:
; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_perm2f128:
@@ -3584,7 +3584,7 @@ define <2 x double> @test_permilpd(<2 x
; SKX: # %bb.0:
; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_permilpd:
@@ -3647,7 +3647,7 @@ define <4 x double> @test_permilpd_ymm(<
; SKX: # %bb.0:
; SKX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_permilpd_ymm:
@@ -3710,7 +3710,7 @@ define <4 x float> @test_permilps(<4 x f
; SKX: # %bb.0:
; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_permilps:
@@ -3773,7 +3773,7 @@ define <8 x float> @test_permilps_ymm(<8
; SKX: # %bb.0:
; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_permilps_ymm:
@@ -4056,7 +4056,7 @@ define <8 x float> @test_rcpps(<8 x floa
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00]
; SKX-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_rcpps:
@@ -4118,9 +4118,9 @@ define <4 x double> @test_roundpd(<4 x d
;
; SKX-LABEL: test_roundpd:
; SKX: # %bb.0:
-; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:0.67]
-; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:0.67]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00]
+; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_roundpd:
@@ -4182,9 +4182,9 @@ define <8 x float> @test_roundps(<8 x fl
;
; SKX-LABEL: test_roundps:
; SKX: # %bb.0:
-; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:0.67]
-; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:0.67]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:1.00]
+; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:1.00]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_roundps:
@@ -4248,7 +4248,7 @@ define <8 x float> @test_rsqrtps(<8 x fl
; SKX: # %bb.0:
; SKX-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00]
; SKX-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_rsqrtps:
@@ -4312,7 +4312,7 @@ define <4 x double> @test_shufpd(<4 x do
; SKX: # %bb.0:
; SKX-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_shufpd:
@@ -4375,7 +4375,7 @@ define <8 x float> @test_shufps(<8 x flo
; SKX: # %bb.0:
; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
; SKX-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_shufps:
@@ -4438,7 +4438,7 @@ define <4 x double> @test_sqrtpd(<4 x do
; SKX: # %bb.0:
; SKX-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:12.00]
; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:12.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_sqrtpd:
@@ -4502,7 +4502,7 @@ define <8 x float> @test_sqrtps(<8 x flo
; SKX: # %bb.0:
; SKX-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:6.00]
; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:6.00]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_sqrtps:
@@ -4559,7 +4559,7 @@ define <4 x double> @test_subpd(<4 x dou
;
; SKX-LABEL: test_subpd:
; SKX: # %bb.0:
-; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -4613,7 +4613,7 @@ define <8 x float> @test_subps(<8 x floa
;
; SKX-LABEL: test_subps:
; SKX: # %bb.0:
-; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -5008,7 +5008,7 @@ define <4 x double> @test_unpckhpd(<4 x
; SKX: # %bb.0:
; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_unpckhpd:
@@ -5125,7 +5125,7 @@ define <4 x double> @test_unpcklpd(<4 x
; SKX: # %bb.0:
; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_unpcklpd:
@@ -5242,7 +5242,7 @@ define <4 x double> @test_xorpd(<4 x dou
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_xorpd:
@@ -5309,7 +5309,7 @@ define <8 x float> @test_xorps(<8 x floa
; SKX: # %bb.0:
; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_xorps:
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Mon Jun 11 07:37:53 2018
@@ -76,7 +76,7 @@ define <4 x double> @test_broadcastsd_ym
; SKX-LABEL: test_broadcastsd_ymm:
; SKX: # %bb.0:
; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_broadcastsd_ymm:
@@ -117,7 +117,7 @@ define <4 x float> @test_broadcastss(<4
; SKX-LABEL: test_broadcastss:
; SKX: # %bb.0:
; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_broadcastss:
@@ -158,7 +158,7 @@ define <8 x float> @test_broadcastss_ymm
; SKX-LABEL: test_broadcastss_ymm:
; SKX: # %bb.0:
; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_broadcastss_ymm:
@@ -2634,7 +2634,7 @@ define <4 x double> @test_permpd(<4 x do
; SKX: # %bb.0:
; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_permpd:
@@ -2683,7 +2683,7 @@ define <8 x float> @test_permps(<8 x i32
; SKX: # %bb.0:
; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_permps:
@@ -3320,7 +3320,7 @@ define <16 x i16> @test_pmaddubsw(<32 x
;
; SKX-LABEL: test_pmaddubsw:
; SKX: # %bb.0:
-; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3364,7 +3364,7 @@ define <8 x i32> @test_pmaddwd(<16 x i16
;
; SKX-LABEL: test_pmaddwd:
; SKX: # %bb.0:
-; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -4761,7 +4761,7 @@ define <4 x i64> @test_pmuldq(<8 x i32>
;
; SKX-LABEL: test_pmuldq:
; SKX: # %bb.0:
-; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -4805,7 +4805,7 @@ define <16 x i16> @test_pmulhrsw(<16 x i
;
; SKX-LABEL: test_pmulhrsw:
; SKX: # %bb.0:
-; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -4848,7 +4848,7 @@ define <16 x i16> @test_pmulhuw(<16 x i1
;
; SKX-LABEL: test_pmulhuw:
; SKX: # %bb.0:
-; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -4891,7 +4891,7 @@ define <16 x i16> @test_pmulhw(<16 x i16
;
; SKX-LABEL: test_pmulhw:
; SKX: # %bb.0:
-; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -4934,8 +4934,8 @@ define <8 x i32> @test_pmulld(<8 x i32>
;
; SKX-LABEL: test_pmulld:
; SKX: # %bb.0:
-; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:0.67]
-; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:0.67]
+; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
+; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pmulld:
@@ -4976,7 +4976,7 @@ define <16 x i16> @test_pmullw(<16 x i16
;
; SKX-LABEL: test_pmullw:
; SKX: # %bb.0:
-; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -5018,7 +5018,7 @@ define <4 x i64> @test_pmuludq(<8 x i32>
;
; SKX-LABEL: test_pmuludq:
; SKX: # %bb.0:
-; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Mon Jun 11 07:37:53 2018
@@ -12,7 +12,7 @@ define <8 x double> @addpd512(<8 x doubl
;
; SKX-LABEL: addpd512:
; SKX: # %bb.0: # %entry
-; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%add.i = fadd <8 x double> %x, %y
@@ -42,7 +42,7 @@ define <16 x float> @addps512(<16 x floa
;
; SKX-LABEL: addps512:
; SKX: # %bb.0: # %entry
-; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%add.i = fadd <16 x float> %x, %y
@@ -72,7 +72,7 @@ define <8 x double> @subpd512(<8 x doubl
;
; SKX-LABEL: subpd512:
; SKX: # %bb.0: # %entry
-; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%sub.i = fsub <8 x double> %x, %y
@@ -103,7 +103,7 @@ define <16 x float> @subps512(<16 x floa
;
; SKX-LABEL: subps512:
; SKX: # %bb.0: # %entry
-; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%sub.i = fsub <16 x float> %x, %y
@@ -134,7 +134,7 @@ define <8 x i64> @imulq512(<8 x i64> %y,
;
; SKX-LABEL: imulq512:
; SKX: # %bb.0:
-; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.00]
+; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.50]
; SKX-NEXT: retq # sched: [7:1.00]
%z = mul <8 x i64>%x, %y
ret <8 x i64>%z
@@ -148,7 +148,7 @@ define <4 x i64> @imulq256(<4 x i64> %y,
;
; SKX-LABEL: imulq256:
; SKX: # %bb.0:
-; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.00]
+; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.50]
; SKX-NEXT: retq # sched: [7:1.00]
%z = mul <4 x i64>%x, %y
ret <4 x i64>%z
@@ -162,7 +162,7 @@ define <2 x i64> @imulq128(<2 x i64> %y,
;
; SKX-LABEL: imulq128:
; SKX: # %bb.0:
-; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
+; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.50]
; SKX-NEXT: retq # sched: [7:1.00]
%z = mul <2 x i64>%x, %y
ret <2 x i64>%z
@@ -176,7 +176,7 @@ define <8 x double> @mulpd512(<8 x doubl
;
; SKX-LABEL: mulpd512:
; SKX: # %bb.0: # %entry
-; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%mul.i = fmul <8 x double> %x, %y
@@ -206,7 +206,7 @@ define <16 x float> @mulps512(<16 x floa
;
; SKX-LABEL: mulps512:
; SKX: # %bb.0: # %entry
-; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%mul.i = fmul <16 x float> %x, %y
@@ -543,7 +543,7 @@ define <16 x i32> @vpmulld_test(<16 x i3
;
; SKX-LABEL: vpmulld_test:
; SKX: # %bb.0:
-; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:0.67]
+; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%x = mul <16 x i32> %i, %j
ret <16 x i32> %x
@@ -712,7 +712,7 @@ define <16 x float> @test_mask_vaddps(<1
; SKX-LABEL: test_mask_vaddps:
; SKX: # %bb.0:
; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
+; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
@@ -732,7 +732,7 @@ define <16 x float> @test_mask_vmulps(<1
; SKX-LABEL: test_mask_vmulps:
; SKX: # %bb.0:
; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
+; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%x = fmul <16 x float> %i, %j
@@ -750,7 +750,7 @@ define <16 x float> @test_mask_vminps(<1
; SKX-LABEL: test_mask_vminps:
; SKX: # %bb.0:
; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
+; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%cmp_res = fcmp olt <16 x float> %i, %j
@@ -769,7 +769,7 @@ define <8 x double> @test_mask_vminpd(<8
; SKX-LABEL: test_mask_vminpd:
; SKX: # %bb.0:
; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
+; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%cmp_res = fcmp olt <8 x double> %i, %j
@@ -788,7 +788,7 @@ define <16 x float> @test_mask_vmaxps(<1
; SKX-LABEL: test_mask_vmaxps:
; SKX: # %bb.0:
; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
+; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%cmp_res = fcmp ogt <16 x float> %i, %j
@@ -807,7 +807,7 @@ define <8 x double> @test_mask_vmaxpd(<8
; SKX-LABEL: test_mask_vmaxpd:
; SKX: # %bb.0:
; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
+; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%cmp_res = fcmp ogt <8 x double> %i, %j
@@ -826,7 +826,7 @@ define <16 x float> @test_mask_vsubps(<1
; SKX-LABEL: test_mask_vsubps:
; SKX: # %bb.0:
; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
+; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%x = fsub <16 x float> %i, %j
@@ -862,7 +862,7 @@ define <8 x double> @test_mask_vaddpd(<8
; SKX-LABEL: test_mask_vaddpd:
; SKX: # %bb.0:
; SKX-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
+; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%x = fadd <8 x double> %i, %j
@@ -880,7 +880,7 @@ define <8 x double> @test_maskz_vaddpd(<
; SKX-LABEL: test_maskz_vaddpd:
; SKX: # %bb.0:
; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.33]
+; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%x = fadd <8 x double> %i, %j
@@ -1071,10 +1071,10 @@ define double @test1(double %a, double %
; SKX-NEXT: jne .LBB64_1 # sched: [1:0.50]
; SKX-NEXT: jnp .LBB64_2 # sched: [1:0.50]
; SKX-NEXT: .LBB64_1: # %l1
-; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
; SKX-NEXT: .LBB64_2: # %l2
-; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%tobool = fcmp une double %a, %b
br i1 %tobool, label %l1, label %l2
@@ -1104,10 +1104,10 @@ define float @test2(float %a, float %b)
; SKX-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00]
; SKX-NEXT: jbe .LBB65_2 # sched: [1:0.50]
; SKX-NEXT: # %bb.1: # %l1
-; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
; SKX-NEXT: .LBB65_2: # %l2
-; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%tobool = fcmp olt float %a, %b
br i1 %tobool, label %l1, label %l2
@@ -1338,7 +1338,7 @@ define <16 x float> @sitof32(<16 x i32>
;
; SKX-LABEL: sitof32:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = sitofp <16 x i32> %a to <16 x float>
ret <16 x float> %b
@@ -1352,7 +1352,7 @@ define <8 x double> @sltof864(<8 x i64>
;
; SKX-LABEL: sltof864:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = sitofp <8 x i64> %a to <8 x double>
ret <8 x double> %b
@@ -1366,7 +1366,7 @@ define <4 x double> @slto4f64(<4 x i64>
;
; SKX-LABEL: slto4f64:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = sitofp <4 x i64> %a to <4 x double>
ret <4 x double> %b
@@ -1380,7 +1380,7 @@ define <2 x double> @slto2f64(<2 x i64>
;
; SKX-LABEL: slto2f64:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = sitofp <2 x i64> %a to <2 x double>
ret <2 x double> %b
@@ -1423,7 +1423,7 @@ define <4 x i64> @f64to4sl(<4 x double>
;
; SKX-LABEL: f64to4sl:
; SKX: # %bb.0:
-; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = fptosi <4 x double> %a to <4 x i64>
ret <4 x i64> %b
@@ -1483,7 +1483,7 @@ define <8 x double> @ulto8f64(<8 x i64>
;
; SKX-LABEL: ulto8f64:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = uitofp <8 x i64> %a to <8 x double>
ret <8 x double> %b
@@ -1498,8 +1498,8 @@ define <16 x double> @ulto16f64(<16 x i6
;
; SKX-LABEL: ulto16f64:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33]
-; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.33]
+; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50]
+; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = uitofp <16 x i64> %a to <16 x double>
ret <16 x double> %b
@@ -1513,7 +1513,7 @@ define <16 x i32> @f64to16si(<16 x float
;
; SKX-LABEL: f64to16si:
; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = fptosi <16 x float> %a to <16 x i32>
ret <16 x i32> %b
@@ -1527,7 +1527,7 @@ define <16 x i32> @f32to16ui(<16 x float
;
; SKX-LABEL: f32to16ui:
; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = fptoui <16 x float> %a to <16 x i32>
ret <16 x i32> %b
@@ -1543,7 +1543,7 @@ define <16 x i8> @f32to16uc(<16 x float>
;
; SKX-LABEL: f32to16uc:
; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -1560,7 +1560,7 @@ define <16 x i16> @f32to16us(<16 x float
;
; SKX-LABEL: f32to16us:
; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = fptoui <16 x float> %f to <16 x i16>
@@ -1575,7 +1575,7 @@ define <8 x i32> @f32to8ui(<8 x float> %
;
; SKX-LABEL: f32to8ui:
; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = fptoui <8 x float> %a to <8 x i32>
ret <8 x i32> %b
@@ -1589,7 +1589,7 @@ define <4 x i32> @f32to4ui(<4 x float> %
;
; SKX-LABEL: f32to4ui:
; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = fptoui <4 x float> %a to <4 x i32>
ret <4 x i32> %b
@@ -1684,7 +1684,7 @@ define <8 x double> @i32to8f64_mask(<8 x
; SKX-LABEL: i32to8f64_mask:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00]
+; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
; VLNOBW-LABEL: i32to8f64_mask:
; VLNOBW: # %bb.0:
@@ -1706,7 +1706,7 @@ define <8 x double> @sito8f64_maskz(<8 x
; SKX-LABEL: sito8f64_maskz:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00]
+; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
; VLNOBW-LABEL: sito8f64_maskz:
; VLNOBW: # %bb.0:
@@ -2094,7 +2094,7 @@ define <8 x double> @slto8f64(<8 x i64>
;
; SKX-LABEL: slto8f64:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = sitofp <8 x i64> %a to <8 x double>
ret <8 x double> %b
@@ -2109,8 +2109,8 @@ define <16 x double> @slto16f64(<16 x i6
;
; SKX-LABEL: slto16f64:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33]
-; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.33]
+; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
+; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = sitofp <16 x i64> %a to <16 x double>
ret <16 x double> %b
@@ -2158,7 +2158,7 @@ define <8 x double> @uito8f64_mask(<8 x
; SKX-LABEL: uito8f64_mask:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00]
+; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
; VLNOBW-LABEL: uito8f64_mask:
; VLNOBW: # %bb.0:
@@ -2180,7 +2180,7 @@ define <8 x double> @uito8f64_maskz(<8 x
; SKX-LABEL: uito8f64_maskz:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00]
+; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%1 = bitcast i8 %b to <8 x i1>
%2 = uitofp <8 x i32> %a to <8 x double>
@@ -2210,7 +2210,7 @@ define <16 x float> @uito16f32(<16 x i32
;
; SKX-LABEL: uito16f32:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = uitofp <16 x i32> %a to <16 x float>
ret <16 x float> %b
@@ -2238,7 +2238,7 @@ define <8 x float> @uito8f32(<8 x i32> %
;
; SKX-LABEL: uito8f32:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = uitofp <8 x i32> %a to <8 x float>
ret <8 x float> %b
@@ -2252,7 +2252,7 @@ define <4 x float> @uito4f32(<4 x i32> %
;
; SKX-LABEL: uito4f32:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = uitofp <4 x i32> %a to <4 x float>
ret <4 x float> %b
@@ -2266,7 +2266,7 @@ define i32 @fptosi(float %a) nounwind {
;
; SKX-LABEL: fptosi:
; SKX: # %bb.0:
-; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [7:1.00]
+; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%b = fptosi float %a to i32
ret i32 %b
@@ -2326,7 +2326,7 @@ define <16 x float> @sbto16f32(<16 x i32
; SKX: # %bb.0:
; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <16 x i32> %a, zeroinitializer
%1 = sitofp <16 x i1> %mask to <16 x float>
@@ -2343,7 +2343,7 @@ define <16 x float> @scto16f32(<16 x i8>
; SKX-LABEL: scto16f32:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%1 = sitofp <16 x i8> %a to <16 x float>
ret <16 x float> %1
@@ -2359,7 +2359,7 @@ define <16 x float> @ssto16f32(<16 x i16
; SKX-LABEL: ssto16f32:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%1 = sitofp <16 x i16> %a to <16 x float>
ret <16 x float> %1
@@ -2482,8 +2482,8 @@ define <8 x float> @sbto8f32(<8 x float>
; SKX-LABEL: sbto8f32:
; SKX: # %bb.0:
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
-; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <8 x float> %a, zeroinitializer
%1 = sitofp <8 x i1> %cmpres to <8 x float>
@@ -2501,8 +2501,8 @@ define <4 x float> @sbto4f32(<4 x float>
; SKX-LABEL: sbto4f32:
; SKX: # %bb.0:
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
-; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <4 x float> %a, zeroinitializer
%1 = sitofp <4 x i1> %cmpres to <4 x float>
@@ -2541,8 +2541,8 @@ define <2 x float> @sbto2f32(<2 x float>
; SKX-LABEL: sbto2f32:
; SKX: # %bb.0:
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
-; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <2 x float> %a, zeroinitializer
%1 = sitofp <2 x i1> %cmpres to <2 x float>
@@ -2561,9 +2561,9 @@ define <2 x double> @sbto2f64(<2 x doubl
; SKX-LABEL: sbto2f64:
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
-; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
%1 = sitofp <2 x i1> %cmpres to <2 x double>
@@ -2580,7 +2580,7 @@ define <16 x float> @ucto16f32(<16 x i8>
; SKX-LABEL: ucto16f32:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = uitofp <16 x i8> %a to <16 x float>
ret <16 x float>%b
@@ -2614,7 +2614,7 @@ define <16 x float> @swto16f32(<16 x i16
; SKX-LABEL: swto16f32:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = sitofp <16 x i16> %a to <16 x float>
ret <16 x float> %b
@@ -2686,7 +2686,7 @@ define <16 x float> @uwto16f32(<16 x i16
; SKX-LABEL: uwto16f32:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = uitofp <16 x i16> %a to <16 x float>
ret <16 x float> %b
@@ -2736,7 +2736,7 @@ define <16 x float> @sito16f32(<16 x i32
;
; SKX-LABEL: sito16f32:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = sitofp <16 x i32> %a to <16 x float>
ret <16 x float> %b
@@ -2772,7 +2772,7 @@ define <16 x float> @usto16f32(<16 x i16
; SKX-LABEL: usto16f32:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b = uitofp <16 x i16> %a to <16 x float>
ret <16 x float> %b
@@ -2791,8 +2791,8 @@ define <16 x float> @ubto16f32(<16 x i32
; SKX: # %bb.0:
; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
+; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <16 x i32> %a, zeroinitializer
%1 = uitofp <16 x i1> %mask to <16 x float>
@@ -2814,7 +2814,7 @@ define <16 x double> @ubto16f64(<16 x i3
; SKX: # %bb.0:
; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:0.50]
+; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00]
; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
@@ -2945,7 +2945,7 @@ define <2 x double> @ubto2f64(<2 x i32>
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
-; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x double>
@@ -4253,7 +4253,7 @@ define <16 x i32> @zext_16i1_to_16xi32
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i16 %b to <16 x i1>
%c = zext <16 x i1> %a to <16 x i32>
@@ -4272,7 +4272,7 @@ define <8 x i64> @zext_8i1_to_8xi64(i8
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i8 %b to <8 x i1>
%c = zext <8 x i1> %a to <8 x i64>
@@ -4312,7 +4312,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
;
; SKX-LABEL: trunc_16i32_to_16i1:
; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:0.50]
+; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00]
; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
@@ -4502,7 +4502,7 @@ define <64 x i16> @test21(<64 x i16> %x
;
; SKX-LABEL: test21:
; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:0.50]
+; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00]
; SKX-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:1.00]
; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
; SKX-NEXT: kshiftrq $32, %k1, %k1 # sched: [3:1.00]
@@ -4666,7 +4666,7 @@ define <32 x i16> @zext_32xi1_to_32xi16(
; SKX: # %bb.0:
; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp eq <32 x i16> %x, %y
%1 = zext <32 x i1> %mask to <32 x i16>
@@ -4763,8 +4763,8 @@ define <16 x float> @test_x86_fmadd_ps_z
;
; SKX-LABEL: test_x86_fmadd_ps_z:
; SKX: # %bb.0:
-; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
-; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
+; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%x = fmul <16 x float> %a0, %a1
%res = fadd <16 x float> %x, %a2
@@ -4780,8 +4780,8 @@ define <16 x float> @test_x86_fmsub_ps_z
;
; SKX-LABEL: test_x86_fmsub_ps_z:
; SKX: # %bb.0:
-; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
-; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
+; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%x = fmul <16 x float> %a0, %a1
%res = fsub <16 x float> %x, %a2
@@ -4797,8 +4797,8 @@ define <16 x float> @test_x86_fnmadd_ps_
;
; SKX-LABEL: test_x86_fnmadd_ps_z:
; SKX: # %bb.0:
-; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
-; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
+; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%x = fmul <16 x float> %a0, %a1
%res = fsub <16 x float> %a2, %x
@@ -4815,9 +4815,9 @@ define <16 x float> @test_x86_fnmsub_ps_
;
; SKX-LABEL: test_x86_fnmsub_ps_z:
; SKX: # %bb.0:
-; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%x = fmul <16 x float> %a0, %a1
%y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
@@ -4837,8 +4837,8 @@ define <8 x double> @test_x86_fmadd_pd_z
;
; SKX-LABEL: test_x86_fmadd_pd_z:
; SKX: # %bb.0:
-; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
-; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
+; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%x = fmul <8 x double> %a0, %a1
%res = fadd <8 x double> %x, %a2
@@ -4854,8 +4854,8 @@ define <8 x double> @test_x86_fmsub_pd_z
;
; SKX-LABEL: test_x86_fmsub_pd_z:
; SKX: # %bb.0:
-; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
-; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
+; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%x = fmul <8 x double> %a0, %a1
%res = fsub <8 x double> %x, %a2
@@ -4871,8 +4871,8 @@ define double @test_x86_fmsub_213(double
;
; SKX-LABEL: test_x86_fmsub_213:
; SKX: # %bb.0:
-; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
-; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%x = fmul double %a0, %a1
%res = fsub double %x, %a2
@@ -4888,7 +4888,7 @@ define double @test_x86_fmsub_213_m(doub
;
; SKX-LABEL: test_x86_fmsub_213_m:
; SKX: # %bb.0:
-; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a2 = load double , double *%a2_ptr
@@ -4907,7 +4907,7 @@ define double @test_x86_fmsub_231_m(doub
; SKX-LABEL: test_x86_fmsub_231_m:
; SKX: # %bb.0:
; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a2 = load double , double *%a2_ptr
%x = fmul double %a0, %a2
@@ -4925,7 +4925,7 @@ define <16 x float> @test231_br(<16 x fl
; SKX-LABEL: test231_br:
; SKX: # %bb.0:
; SKX-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
%b2 = fadd <16 x float> %b1, %a2
@@ -4941,7 +4941,7 @@ define <16 x float> @test213_br(<16 x fl
;
; SKX-LABEL: test213_br:
; SKX: # %bb.0:
-; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b1 = fmul <16 x float> %a1, %a2
@@ -4964,7 +4964,7 @@ define <16 x float> @test_x86_fmadd132_p
; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00]
; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50]
-; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.33]
+; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
%x = fmul <16 x float> %a0, %a2
@@ -4989,7 +4989,7 @@ define <16 x float> @test_x86_fmadd231_p
; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00]
; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.33]
+; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.50]
; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
@@ -5014,7 +5014,7 @@ define <16 x float> @test_x86_fmadd213_p
; SKX: # %bb.0:
; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00]
-; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
; SKX-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50]
; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -5035,7 +5035,7 @@ define <16 x i32> @vpandd(<16 x i32> %a,
; SKX-LABEL: vpandd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
@@ -5055,7 +5055,7 @@ define <16 x i32> @vpandnd(<16 x i32> %a
; SKX-LABEL: vpandnd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
@@ -5077,7 +5077,7 @@ define <16 x i32> @vpord(<16 x i32> %a,
; SKX-LABEL: vpord:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
@@ -5097,7 +5097,7 @@ define <16 x i32> @vpxord(<16 x i32> %a,
; SKX-LABEL: vpxord:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
@@ -5117,7 +5117,7 @@ define <8 x i64> @vpandq(<8 x i64> %a, <
; SKX-LABEL: vpandq:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
@@ -5136,7 +5136,7 @@ define <8 x i64> @vpandnq(<8 x i64> %a,
; SKX-LABEL: vpandnq:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
@@ -5156,7 +5156,7 @@ define <8 x i64> @vporq(<8 x i64> %a, <8
; SKX-LABEL: vporq:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
@@ -5175,7 +5175,7 @@ define <8 x i64> @vpxorq(<8 x i64> %a, <
; SKX-LABEL: vpxorq:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
@@ -5192,7 +5192,7 @@ define <64 x i8> @and_v64i8(<64 x i8> %a
;
; SKX-LABEL: and_v64i8:
; SKX: # %bb.0:
-; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%res = and <64 x i8> %a, %b
ret <64 x i8> %res
@@ -5206,7 +5206,7 @@ define <64 x i8> @andn_v64i8(<64 x i8> %
;
; SKX-LABEL: andn_v64i8:
; SKX: # %bb.0:
-; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
@@ -5224,7 +5224,7 @@ define <64 x i8> @or_v64i8(<64 x i8> %a,
;
; SKX-LABEL: or_v64i8:
; SKX: # %bb.0:
-; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%res = or <64 x i8> %a, %b
ret <64 x i8> %res
@@ -5238,7 +5238,7 @@ define <64 x i8> @xor_v64i8(<64 x i8> %a
;
; SKX-LABEL: xor_v64i8:
; SKX: # %bb.0:
-; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%res = xor <64 x i8> %a, %b
ret <64 x i8> %res
@@ -5252,7 +5252,7 @@ define <32 x i16> @and_v32i16(<32 x i16>
;
; SKX-LABEL: and_v32i16:
; SKX: # %bb.0:
-; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%res = and <32 x i16> %a, %b
ret <32 x i16> %res
@@ -5266,7 +5266,7 @@ define <32 x i16> @andn_v32i16(<32 x i16
;
; SKX-LABEL: andn_v32i16:
; SKX: # %bb.0:
-; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -5282,7 +5282,7 @@ define <32 x i16> @or_v32i16(<32 x i16>
;
; SKX-LABEL: or_v32i16:
; SKX: # %bb.0:
-; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%res = or <32 x i16> %a, %b
ret <32 x i16> %res
@@ -5296,7 +5296,7 @@ define <32 x i16> @xor_v32i16(<32 x i16>
;
; SKX-LABEL: xor_v32i16:
; SKX: # %bb.0:
-; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%res = xor <32 x i16> %a, %b
ret <32 x i16> %res
@@ -5313,8 +5313,8 @@ define <16 x float> @masked_and_v16f32(<
; SKX-LABEL: masked_and_v16f32:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
-; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
+; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a1 = bitcast <16 x float> %a to <16 x i32>
%b1 = bitcast <16 x float> %b to <16 x i32>
@@ -5338,8 +5338,8 @@ define <16 x float> @masked_or_v16f32(<1
; SKX-LABEL: masked_or_v16f32:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
-; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
+; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a1 = bitcast <16 x float> %a to <16 x i32>
%b1 = bitcast <16 x float> %b to <16 x i32>
@@ -5363,8 +5363,8 @@ define <16 x float> @masked_xor_v16f32(<
; SKX-LABEL: masked_xor_v16f32:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
-; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
+; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a1 = bitcast <16 x float> %a to <16 x i32>
%b1 = bitcast <16 x float> %b to <16 x i32>
@@ -5388,8 +5388,8 @@ define <8 x double> @masked_and_v8f64(<8
; SKX-LABEL: masked_and_v8f64:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
-; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
+; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a1 = bitcast <8 x double> %a to <8 x i64>
%b1 = bitcast <8 x double> %b to <8 x i64>
@@ -5413,8 +5413,8 @@ define <8 x double> @masked_or_v8f64(<8
; SKX-LABEL: masked_or_v8f64:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
-; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
+; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a1 = bitcast <8 x double> %a to <8 x i64>
%b1 = bitcast <8 x double> %b to <8 x i64>
@@ -5438,8 +5438,8 @@ define <8 x double> @masked_xor_v8f64(<8
; SKX-LABEL: masked_xor_v8f64:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
-; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
+; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a1 = bitcast <8 x double> %a to <8 x i64>
%b1 = bitcast <8 x double> %b to <8 x i64>
@@ -5462,7 +5462,7 @@ define <8 x i64> @test_mm512_mask_and_ep
; SKX-LABEL: test_mm512_mask_and_epi32:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
+; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%and1.i.i = and <8 x i64> %__a, %__b
@@ -5484,7 +5484,7 @@ define <8 x i64> @test_mm512_mask_or_epi
; SKX-LABEL: test_mm512_mask_or_epi32:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
+; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%or1.i.i = or <8 x i64> %__a, %__b
@@ -5506,7 +5506,7 @@ define <8 x i64> @test_mm512_mask_xor_ep
; SKX-LABEL: test_mm512_mask_xor_epi32:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
+; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%xor1.i.i = xor <8 x i64> %__a, %__b
@@ -5528,7 +5528,7 @@ define <8 x double> @test_mm512_mask_xor
; SKX-LABEL: test_mm512_mask_xor_pd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
+; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@@ -5550,7 +5550,7 @@ define <8 x double> @test_mm512_maskz_xo
; SKX-LABEL: test_mm512_maskz_xor_pd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
+; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@@ -5572,7 +5572,7 @@ define <16 x float> @test_mm512_mask_xor
; SKX-LABEL: test_mm512_mask_xor_ps:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
+; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@@ -5594,7 +5594,7 @@ define <16 x float> @test_mm512_maskz_xo
; SKX-LABEL: test_mm512_maskz_xor_ps:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
+; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@@ -5616,7 +5616,7 @@ define <8 x double> @test_mm512_mask_or_
; SKX-LABEL: test_mm512_mask_or_pd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33]
+; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@@ -5638,7 +5638,7 @@ define <8 x double> @test_mm512_maskz_or
; SKX-LABEL: test_mm512_maskz_or_pd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33]
+; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@@ -5660,7 +5660,7 @@ define <16 x float> @test_mm512_mask_or_
; SKX-LABEL: test_mm512_mask_or_ps:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33]
+; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@@ -5682,7 +5682,7 @@ define <16 x float> @test_mm512_maskz_or
; SKX-LABEL: test_mm512_maskz_or_ps:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33]
+; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@@ -5704,7 +5704,7 @@ define <8 x double> @test_mm512_mask_and
; SKX-LABEL: test_mm512_mask_and_pd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33]
+; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@@ -5726,7 +5726,7 @@ define <8 x double> @test_mm512_maskz_an
; SKX-LABEL: test_mm512_maskz_and_pd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33]
+; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@@ -5748,7 +5748,7 @@ define <16 x float> @test_mm512_mask_and
; SKX-LABEL: test_mm512_mask_and_ps:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33]
+; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@@ -5770,7 +5770,7 @@ define <16 x float> @test_mm512_maskz_an
; SKX-LABEL: test_mm512_maskz_and_ps:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33]
+; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@@ -5792,7 +5792,7 @@ define <8 x double> @test_mm512_mask_and
; SKX-LABEL: test_mm512_mask_andnot_pd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
+; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@@ -5815,7 +5815,7 @@ define <8 x double> @test_mm512_maskz_an
; SKX-LABEL: test_mm512_maskz_andnot_pd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
+; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <8 x double> %__A to <8 x i64>
@@ -5838,7 +5838,7 @@ define <16 x float> @test_mm512_mask_and
; SKX-LABEL: test_mm512_mask_andnot_ps:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
+; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@@ -5861,7 +5861,7 @@ define <16 x float> @test_mm512_maskz_an
; SKX-LABEL: test_mm512_maskz_andnot_ps:
; SKX: # %bb.0: # %entry
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
+; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
%0 = bitcast <16 x float> %__A to <16 x i32>
@@ -7927,7 +7927,7 @@ define void @store_32i1_1(<32 x i1>* %a,
;
; SKX-LABEL: store_32i1_1:
; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00]
; SKX-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:1.00]
; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
@@ -7950,7 +7950,7 @@ define void @store_64i1(<64 x i1>* %a, <
;
; SKX-LABEL: store_64i1:
; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00]
; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00]
; SKX-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
@@ -8709,7 +8709,7 @@ define <16 x float> @broadcast_ss_spill(
; SKX: # %bb.0:
; SKX-NEXT: subq $24, %rsp # sched: [1:0.25]
; SKX-NEXT: .cfi_def_cfa_offset 32
-; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
; SKX-NEXT: callq func_f32
; SKX-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
@@ -8741,7 +8741,7 @@ define <8 x double> @broadcast_sd_spill(
; SKX: # %bb.0:
; SKX-NEXT: subq $24, %rsp # sched: [1:0.25]
; SKX-NEXT: .cfi_def_cfa_offset 32
-; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
; SKX-NEXT: callq func_f64
; SKX-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
Modified: llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll Mon Jun 11 07:37:53 2018
@@ -25,15 +25,15 @@ define void @test_vpopcntd(<16 x i32> %a
; ICELAKE: # %bb.0:
; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
; ICELAKE-NEXT: #APP
-; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:0.50]
-; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:0.50]
-; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:1.00]
+; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:1.00]
+; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
+; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:1.00]
+; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
+; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
+; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:1.00]
+; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00]
+; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00]
; ICELAKE-NEXT: #NO_APP
; ICELAKE-NEXT: vzeroupper # sched: [4:1.00]
; ICELAKE-NEXT: retq # sched: [7:1.00]
@@ -63,15 +63,15 @@ define void @test_vpopcntq(<8 x i64> %a0
; ICELAKE: # %bb.0:
; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
; ICELAKE-NEXT: #APP
-; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:0.50]
-; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:0.50]
-; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:1.00]
+; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:1.00]
+; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
+; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:1.00]
+; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:1.00]
+; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
+; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:1.00]
+; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00]
+; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00]
; ICELAKE-NEXT: #NO_APP
; ICELAKE-NEXT: vzeroupper # sched: [4:1.00]
; ICELAKE-NEXT: retq # sched: [7:1.00]
Modified: llvm/trunk/test/CodeGen/X86/fma-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-schedule.ll Mon Jun 11 07:37:53 2018
@@ -75,9 +75,9 @@ define void @test_vfmaddpd_128(<2 x doub
; SKX-LABEL: test_vfmaddpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
@@ -167,9 +167,9 @@ define void @test_vfmaddpd_256(<4 x doub
; SKX-LABEL: test_vfmaddpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.33]
-; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.33]
+; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
+; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
@@ -257,9 +257,9 @@ define void @test_vfmaddps_128(<4 x floa
; SKX-LABEL: test_vfmaddps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
@@ -349,9 +349,9 @@ define void @test_vfmaddps_256(<8 x floa
; SKX-LABEL: test_vfmaddps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.33]
-; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
+; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
@@ -439,9 +439,9 @@ define void @test_vfmaddsd_128(<2 x doub
; SKX-LABEL: test_vfmaddsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
@@ -527,9 +527,9 @@ define void @test_vfmaddss_128(<4 x floa
; SKX-LABEL: test_vfmaddss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
@@ -619,9 +619,9 @@ define void @test_vfmaddsubpd_128(<2 x d
; SKX-LABEL: test_vfmaddsubpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
@@ -711,9 +711,9 @@ define void @test_vfmaddsubpd_256(<4 x d
; SKX-LABEL: test_vfmaddsubpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.33]
-; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.33]
+; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
+; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
@@ -801,9 +801,9 @@ define void @test_vfmaddsubps_128(<4 x f
; SKX-LABEL: test_vfmaddsubps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
@@ -893,9 +893,9 @@ define void @test_vfmaddsubps_256(<8 x f
; SKX-LABEL: test_vfmaddsubps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.33]
-; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.33]
+; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
+; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
@@ -987,9 +987,9 @@ define void @test_vfmsubaddpd_128(<2 x d
; SKX-LABEL: test_vfmsubaddpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
@@ -1079,9 +1079,9 @@ define void @test_vfmsubaddpd_256(<4 x d
; SKX-LABEL: test_vfmsubaddpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.33]
-; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.33]
+; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
+; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
@@ -1169,9 +1169,9 @@ define void @test_vfmsubaddps_128(<4 x f
; SKX-LABEL: test_vfmsubaddps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
@@ -1261,9 +1261,9 @@ define void @test_vfmsubaddps_256(<8 x f
; SKX-LABEL: test_vfmsubaddps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.33]
-; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.33]
+; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
+; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
@@ -1355,9 +1355,9 @@ define void @test_vfmsubpd_128(<2 x doub
; SKX-LABEL: test_vfmsubpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
@@ -1447,9 +1447,9 @@ define void @test_vfmsubpd_256(<4 x doub
; SKX-LABEL: test_vfmsubpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.33]
-; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.33]
+; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
+; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
@@ -1537,9 +1537,9 @@ define void @test_vfmsubps_128(<4 x floa
; SKX-LABEL: test_vfmsubps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
@@ -1629,9 +1629,9 @@ define void @test_vfmsubps_256(<8 x floa
; SKX-LABEL: test_vfmsubps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.33]
-; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.33]
+; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
+; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
@@ -1719,9 +1719,9 @@ define void @test_vfmsubsd_128(<2 x doub
; SKX-LABEL: test_vfmsubsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
@@ -1807,9 +1807,9 @@ define void @test_vfmsubss_128(<4 x floa
; SKX-LABEL: test_vfmsubss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33]
-; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33]
+; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
+; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
@@ -1899,9 +1899,9 @@ define void @test_vfnmaddpd_128(<2 x dou
; SKX-LABEL: test_vfnmaddpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33]
+; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
@@ -1991,9 +1991,9 @@ define void @test_vfnmaddpd_256(<4 x dou
; SKX-LABEL: test_vfnmaddpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.33]
-; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.33]
+; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
+; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
@@ -2081,9 +2081,9 @@ define void @test_vfnmaddps_128(<4 x flo
; SKX-LABEL: test_vfnmaddps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33]
+; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
@@ -2173,9 +2173,9 @@ define void @test_vfnmaddps_256(<8 x flo
; SKX-LABEL: test_vfnmaddps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.33]
-; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.33]
+; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
+; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
@@ -2263,9 +2263,9 @@ define void @test_vfnmaddsd_128(<2 x dou
; SKX-LABEL: test_vfnmaddsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33]
+; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
@@ -2351,9 +2351,9 @@ define void @test_vfnmaddss_128(<4 x flo
; SKX-LABEL: test_vfnmaddss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33]
+; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
@@ -2443,9 +2443,9 @@ define void @test_vfnmsubpd_128(<2 x dou
; SKX-LABEL: test_vfnmsubpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33]
-; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33]
+; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
+; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
@@ -2535,9 +2535,9 @@ define void @test_vfnmsubpd_256(<4 x dou
; SKX-LABEL: test_vfnmsubpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.33]
-; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.33]
-; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.33]
+; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
+; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
+; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
@@ -2625,9 +2625,9 @@ define void @test_vfnmsubps_128(<4 x flo
; SKX-LABEL: test_vfnmsubps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33]
-; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33]
+; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
+; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
@@ -2717,9 +2717,9 @@ define void @test_vfnmsubps_256(<8 x flo
; SKX-LABEL: test_vfnmsubps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.33]
-; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.33]
-; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.33]
+; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
+; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
+; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
@@ -2807,9 +2807,9 @@ define void @test_vfnmsubsd_128(<2 x dou
; SKX-LABEL: test_vfnmsubsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33]
-; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33]
+; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
+; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
@@ -2895,9 +2895,9 @@ define void @test_vfnmsubss_128(<4 x flo
; SKX-LABEL: test_vfnmsubss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33]
-; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33]
+; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
+; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
Modified: llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-schedule.ll Mon Jun 11 07:37:53 2018
@@ -152,9 +152,9 @@ define <2 x double> @test_cvtpi2pd(x86_m
;
; SKX-LABEL: test_cvtpi2pd:
; SKX: # %bb.0:
-; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:0.50]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cvtpi2pd:
@@ -232,7 +232,7 @@ define <4 x float> @test_cvtpi2ps(x86_mm
; SKX: # %bb.0:
; SKX-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00]
; SKX-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cvtpi2ps:
Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll Mon Jun 11 07:37:53 2018
@@ -153,7 +153,7 @@ define float @f32_one_step(float %x) #1
; SKX: # %bb.0:
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 1.0, %x
ret float %div
@@ -271,10 +271,10 @@ define float @f32_two_step(float %x) #2
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33]
+; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 1.0, %x
ret float %div
@@ -418,7 +418,7 @@ define <4 x float> @v4f32_one_step(<4 x
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
@@ -536,10 +536,10 @@ define <4 x float> @v4f32_two_step(<4 x
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
@@ -693,7 +693,7 @@ define <8 x float> @v8f32_one_step(<8 x
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
@@ -824,10 +824,10 @@ define <8 x float> @v8f32_two_step(<8 x
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.33]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
@@ -1031,9 +1031,9 @@ define <16 x float> @v16f32_one_step(<16
;
; SKX-LABEL: v16f32_one_step:
; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00]
+; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
@@ -1235,13 +1235,13 @@ define <16 x float> @v16f32_two_step(<16
;
; SKX-LABEL: v16f32_two_step:
; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00]
+; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [8:0.50]
; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.33]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll Mon Jun 11 07:37:53 2018
@@ -154,7 +154,7 @@ define float @f32_one_step_2(float %x) #
; SKX: # %bb.0:
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 3456.0, %x
@@ -254,9 +254,9 @@ define float @f32_one_step_2_divs(float
; SKX: # %bb.0:
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
-; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 3456.0, %x
%div2 = fdiv fast float %div, %x
@@ -383,10 +383,10 @@ define float @f32_two_step_2(float %x) #
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33]
+; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 6789.0, %x
@@ -480,7 +480,7 @@ define <4 x float> @v4f32_one_step2(<4 x
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
@@ -582,9 +582,9 @@ define <4 x float> @v4f32_one_step_2_div
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50]
-; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
%div2 = fdiv fast <4 x float> %div, %x
@@ -711,10 +711,10 @@ define <4 x float> @v4f32_two_step2(<4 x
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
@@ -816,7 +816,7 @@ define <8 x float> @v8f32_one_step2(<8 x
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
@@ -927,9 +927,9 @@ define <8 x float> @v8f32_one_step_2_div
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [11:0.50]
-; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
%div2 = fdiv fast <8 x float> %div, %x
@@ -1070,10 +1070,10 @@ define <8 x float> @v8f32_two_step2(<8 x
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.33]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
@@ -1331,9 +1331,9 @@ define <16 x float> @v16f32_one_step2(<1
;
; SKX-LABEL: v16f32_one_step2:
; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00]
+; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
@@ -1498,11 +1498,11 @@ define <16 x float> @v16f32_one_step_2_d
;
; SKX-LABEL: v16f32_one_step_2_divs:
; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00]
+; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33]
+; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [11:0.50]
-; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
%div2 = fdiv fast <16 x float> %div, %x
@@ -1721,13 +1721,13 @@ define <16 x float> @v16f32_two_step2(<1
;
; SKX-LABEL: v16f32_two_step2:
; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00]
+; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [8:0.50]
; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.33]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.33]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50]
+; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
@@ -1786,7 +1786,7 @@ define <16 x float> @v16f32_no_step(<16
;
; SKX-LABEL: v16f32_no_step:
; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [9:2.00]
+; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
@@ -1861,7 +1861,7 @@ define <16 x float> @v16f32_no_step2(<16
;
; SKX-LABEL: v16f32_no_step2:
; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [9:2.00]
+; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00]
; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll Mon Jun 11 07:37:53 2018
@@ -8376,13 +8376,13 @@ define void @test_nop(i16 %a0, i32 %a1,
; SKX-LABEL: test_nop:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: nop # sched: [1:0.25]
-; SKX-NEXT: nopw %di # sched: [1:0.25]
-; SKX-NEXT: nopw (%rcx) # sched: [1:0.25]
-; SKX-NEXT: nopl %esi # sched: [1:0.25]
-; SKX-NEXT: nopl (%r8) # sched: [1:0.25]
-; SKX-NEXT: nopq %rdx # sched: [1:0.25]
-; SKX-NEXT: nopq (%r9) # sched: [1:0.25]
+; SKX-NEXT: nop # sched: [1:0.17]
+; SKX-NEXT: nopw %di # sched: [1:0.17]
+; SKX-NEXT: nopw (%rcx) # sched: [1:0.17]
+; SKX-NEXT: nopl %esi # sched: [1:0.17]
+; SKX-NEXT: nopl (%r8) # sched: [1:0.17]
+; SKX-NEXT: nopq %rdx # sched: [1:0.17]
+; SKX-NEXT: nopq (%r9) # sched: [1:0.17]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
Modified: llvm/trunk/test/CodeGen/X86/sha-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sha-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sha-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sha-schedule.ll Mon Jun 11 07:37:53 2018
@@ -23,7 +23,7 @@ define <4 x i32> @test_sha1msg1(<4 x i32
;
; CANNONLAKE-LABEL: test_sha1msg1:
; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@@ -54,7 +54,7 @@ define <4 x i32> @test_sha1msg2(<4 x i32
;
; CANNONLAKE-LABEL: test_sha1msg2:
; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@@ -85,7 +85,7 @@ define <4 x i32> @test_sha1nexte(<4 x i3
;
; CANNONLAKE-LABEL: test_sha1nexte:
; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@@ -116,7 +116,7 @@ define <4 x i32> @test_sha1rnds4(<4 x i3
;
; CANNONLAKE-LABEL: test_sha1rnds4:
; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@@ -151,7 +151,7 @@ define <4 x i32> @test_sha256msg1(<4 x i
;
; CANNONLAKE-LABEL: test_sha256msg1:
; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@@ -182,7 +182,7 @@ define <4 x i32> @test_sha256msg2(<4 x i
;
; CANNONLAKE-LABEL: test_sha256msg2:
; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@@ -221,7 +221,7 @@ define <4 x i32> @test_sha256rnds2(<4 x
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.33]
; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.33]
+; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50]
; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.33]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Mon Jun 11 07:37:53 2018
@@ -90,13 +90,13 @@ define <4 x float> @test_addps(<4 x floa
;
; SKX-SSE-LABEL: test_addps:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addps:
; SKX: # %bb.0:
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -198,13 +198,13 @@ define float @test_addss(float %a0, floa
;
; SKX-SSE-LABEL: test_addss:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addss:
; SKX: # %bb.0:
-; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -551,14 +551,14 @@ define <4 x float> @test_cmpps(<4 x floa
;
; SKX-SSE-LABEL: test_cmpps:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cmpps:
; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -669,13 +669,13 @@ define float @test_cmpss(float %a0, floa
;
; SKX-SSE-LABEL: test_cmpss:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cmpss:
; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -1041,14 +1041,14 @@ define float @test_cvtsi2ss(i32 %a0, i32
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsi2ss:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2ss:
@@ -1167,14 +1167,14 @@ define float @test_cvtsi2ssq(i64 %a0, i6
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00]
; SKX-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsi2ssq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2ssq:
@@ -1420,14 +1420,14 @@ define i64 @test_cvtss2siq(float %a0, fl
;
; SKX-SSE-LABEL: test_cvtss2siq:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [7:1.00]
; SKX-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00]
; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtss2siq:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [7:1.00]
; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00]
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -1549,14 +1549,14 @@ define i32 @test_cvttss2si(float %a0, fl
;
; SKX-SSE-LABEL: test_cvttss2si:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00]
+; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [6:1.00]
; SKX-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00]
; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvttss2si:
; SKX: # %bb.0:
-; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
+; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [6:1.00]
; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00]
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -2116,13 +2116,13 @@ define <4 x float> @test_maxps(<4 x floa
;
; SKX-SSE-LABEL: test_maxps:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_maxps:
; SKX: # %bb.0:
-; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2225,13 +2225,13 @@ define <4 x float> @test_maxss(<4 x floa
;
; SKX-SSE-LABEL: test_maxss:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_maxss:
; SKX: # %bb.0:
-; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2334,13 +2334,13 @@ define <4 x float> @test_minps(<4 x floa
;
; SKX-SSE-LABEL: test_minps:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_minps:
; SKX: # %bb.0:
-; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2443,13 +2443,13 @@ define <4 x float> @test_minss(<4 x floa
;
; SKX-SSE-LABEL: test_minss:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_minss:
; SKX: # %bb.0:
-; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2564,14 +2564,14 @@ define void @test_movaps(<4 x float> *%a
; SKX-SSE-LABEL: test_movaps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movaps:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2795,7 +2795,7 @@ define void @test_movhps(<4 x float> %a0
; SKX-SSE-LABEL: test_movhps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
@@ -2803,7 +2803,7 @@ define void @test_movhps(<4 x float> %a0
; SKX-LABEL: test_movhps:
; SKX: # %bb.0:
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2918,13 +2918,13 @@ define <4 x float> @test_movlhps(<4 x fl
; SKX-SSE-LABEL: test_movlhps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movlhps:
; SKX: # %bb.0:
; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movlhps:
@@ -3036,14 +3036,14 @@ define void @test_movlps(<4 x float> %a0
; SKX-SSE-LABEL: test_movlps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movlps:
; SKX: # %bb.0:
; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3352,14 +3352,14 @@ define void @test_movss_mem(float* %a0,
; SKX-SSE-LABEL: test_movss_mem:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movss_mem:
; SKX: # %bb.0:
; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3572,14 +3572,14 @@ define void @test_movups(<4 x float> *%a
; SKX-SSE-LABEL: test_movups:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movups:
; SKX: # %bb.0:
; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3685,13 +3685,13 @@ define <4 x float> @test_mulps(<4 x floa
;
; SKX-SSE-LABEL: test_mulps:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_mulps:
; SKX: # %bb.0:
-; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3793,13 +3793,13 @@ define float @test_mulss(float %a0, floa
;
; SKX-SSE-LABEL: test_mulss:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_mulss:
; SKX: # %bb.0:
-; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -4206,14 +4206,14 @@ define <4 x float> @test_rcpps(<4 x floa
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_rcpps:
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_rcpps:
@@ -4347,7 +4347,7 @@ define <4 x float> @test_rcpss(float %a0
; SKX-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00]
; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_rcpss:
@@ -4355,7 +4355,7 @@ define <4 x float> @test_rcpss(float %a0
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_rcpss:
@@ -4483,14 +4483,14 @@ define <4 x float> @test_rsqrtps(<4 x fl
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_rsqrtps:
; SKX: # %bb.0:
; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_rsqrtps:
@@ -4624,7 +4624,7 @@ define <4 x float> @test_rsqrtss(float %
; SKX-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00]
; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_rsqrtss:
@@ -4632,7 +4632,7 @@ define <4 x float> @test_rsqrtss(float %
; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_rsqrtss:
@@ -4854,14 +4854,14 @@ define <4 x float> @test_shufps(<4 x flo
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
; SKX-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_shufps:
; SKX: # %bb.0:
; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
; SKX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_shufps:
@@ -4981,14 +4981,14 @@ define <4 x float> @test_sqrtps(<4 x flo
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:3.00]
; SKX-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:3.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_sqrtps:
; SKX: # %bb.0:
; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:3.00]
; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:3.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_sqrtps:
@@ -5122,7 +5122,7 @@ define <4 x float> @test_sqrtss(<4 x flo
; SKX-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:3.00]
; SKX-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
; SKX-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:3.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_sqrtss:
@@ -5130,7 +5130,7 @@ define <4 x float> @test_sqrtss(<4 x flo
; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_sqrtss:
@@ -5351,13 +5351,13 @@ define <4 x float> @test_subps(<4 x floa
;
; SKX-SSE-LABEL: test_subps:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_subps:
; SKX: # %bb.0:
-; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -5459,13 +5459,13 @@ define float @test_subss(float %a0, floa
;
; SKX-SSE-LABEL: test_subss:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_subss:
; SKX: # %bb.0:
-; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -5826,14 +5826,14 @@ define <4 x float> @test_unpckhps(<4 x f
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_unpckhps:
; SKX: # %bb.0:
; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; SKX-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_unpckhps:
@@ -5952,14 +5952,14 @@ define <4 x float> @test_unpcklps(<4 x f
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_unpcklps:
; SKX: # %bb.0:
; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
; SKX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_unpcklps:
@@ -6210,7 +6210,7 @@ define <4 x float> @test_fnop() nounwind
; SKX-SSE-LABEL: test_fnop:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: #APP
-; SKX-SSE-NEXT: nop # sched: [1:0.25]
+; SKX-SSE-NEXT: nop # sched: [1:0.17]
; SKX-SSE-NEXT: #NO_APP
; SKX-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
@@ -6218,7 +6218,7 @@ define <4 x float> @test_fnop() nounwind
; SKX-LABEL: test_fnop:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: nop # sched: [1:0.25]
+; SKX-NEXT: nop # sched: [1:0.17]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Mon Jun 11 07:37:53 2018
@@ -88,13 +88,13 @@ define <2 x double> @test_addpd(<2 x dou
;
; SKX-SSE-LABEL: test_addpd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addpd:
; SKX: # %bb.0:
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -196,13 +196,13 @@ define double @test_addsd(double %a0, do
;
; SKX-SSE-LABEL: test_addsd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addsd:
; SKX: # %bb.0:
-; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -317,14 +317,14 @@ define <2 x double> @test_andpd(<2 x dou
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_andpd:
; SKX: # %bb.0:
; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_andpd:
@@ -447,14 +447,14 @@ define <2 x double> @test_andnotpd(<2 x
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_andnotpd:
; SKX: # %bb.0:
; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_andnotpd:
@@ -673,14 +673,14 @@ define <2 x double> @test_cmppd(<2 x dou
;
; SKX-SSE-LABEL: test_cmppd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cmppd:
; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -790,13 +790,13 @@ define double @test_cmpsd(double %a0, do
;
; SKX-SSE-LABEL: test_cmpsd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cmpsd:
; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -1162,16 +1162,16 @@ define <2 x double> @test_cvtdq2pd(<4 x
;
; SKX-SSE-LABEL: test_cvtdq2pd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtdq2pd:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtdq2pd:
@@ -1291,16 +1291,16 @@ define <4 x float> @test_cvtdq2ps(<4 x i
;
; SKX-SSE-LABEL: test_cvtdq2ps:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtdq2ps:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtdq2ps:
@@ -1427,7 +1427,7 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
; SKX-LABEL: test_cvtpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -1550,14 +1550,14 @@ define <4 x float> @test_cvtpd2ps(<2 x d
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtpd2ps:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtpd2ps:
@@ -1676,14 +1676,14 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
;
; SKX-SSE-LABEL: test_cvtps2dq:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtps2dq:
; SKX: # %bb.0:
-; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -1806,14 +1806,14 @@ define <2 x double> @test_cvtps2pd(<4 x
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtps2pd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtps2pd:
@@ -2205,7 +2205,7 @@ define float @test_cvtsd2ss(double %a0,
; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsd2ss:
@@ -2213,7 +2213,7 @@ define float @test_cvtsd2ss(double %a0,
; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsd2ss:
@@ -2336,14 +2336,14 @@ define double @test_cvtsi2sd(i32 %a0, i3
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsi2sd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2sd:
@@ -2462,14 +2462,14 @@ define double @test_cvtsi2sdq(i64 %a0, i
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsi2sdq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2sdq:
@@ -2603,7 +2603,7 @@ define double @test_cvtss2sd(float %a0,
; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtss2sd:
@@ -2611,7 +2611,7 @@ define double @test_cvtss2sd(float %a0,
; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtss2sd:
@@ -2742,7 +2742,7 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
; SKX-LABEL: test_cvttpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -2863,14 +2863,14 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
;
; SKX-SSE-LABEL: test_cvttps2dq:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvttps2dq:
; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -3732,13 +3732,13 @@ define <2 x double> @test_maxpd(<2 x dou
;
; SKX-SSE-LABEL: test_maxpd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_maxpd:
; SKX: # %bb.0:
-; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3841,13 +3841,13 @@ define <2 x double> @test_maxsd(<2 x dou
;
; SKX-SSE-LABEL: test_maxsd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_maxsd:
; SKX: # %bb.0:
-; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -3950,13 +3950,13 @@ define <2 x double> @test_minpd(<2 x dou
;
; SKX-SSE-LABEL: test_minpd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_minpd:
; SKX: # %bb.0:
-; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -4059,13 +4059,13 @@ define <2 x double> @test_minsd(<2 x dou
;
; SKX-SSE-LABEL: test_minsd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_minsd:
; SKX: # %bb.0:
-; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -4180,14 +4180,14 @@ define void @test_movapd(<2 x double> *%
; SKX-SSE-LABEL: test_movapd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movapd:
; SKX: # %bb.0:
; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -4917,14 +4917,14 @@ define void @test_movhpd(<2 x double> %a
; SKX-SSE-LABEL: test_movhpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movhpd:
; SKX: # %bb.0:
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -5045,14 +5045,14 @@ define void @test_movlpd(<2 x double> %a
; SKX-SSE-LABEL: test_movlpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movlpd:
; SKX: # %bb.0:
; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -5362,13 +5362,13 @@ define void @test_movntpd(<2 x double> %
;
; SKX-SSE-LABEL: test_movntpd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movntpd:
; SKX: # %bb.0:
-; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -5717,14 +5717,14 @@ define void @test_movsd_mem(double* %a0,
; SKX-SSE-LABEL: test_movsd_mem:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movsd_mem:
; SKX: # %bb.0:
; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -5945,14 +5945,14 @@ define void @test_movupd(<2 x double> *%
; SKX-SSE-LABEL: test_movupd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movupd:
; SKX: # %bb.0:
; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -6058,13 +6058,13 @@ define <2 x double> @test_mulpd(<2 x dou
;
; SKX-SSE-LABEL: test_mulpd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_mulpd:
; SKX: # %bb.0:
-; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -6166,13 +6166,13 @@ define double @test_mulsd(double %a0, do
;
; SKX-SSE-LABEL: test_mulsd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_mulsd:
; SKX: # %bb.0:
-; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -6287,14 +6287,14 @@ define <2 x double> @test_orpd(<2 x doub
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_orpd:
; SKX: # %bb.0:
; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_orpd:
@@ -9176,13 +9176,13 @@ define <4 x i32> @test_pmaddwd(<8 x i16>
;
; SKX-SSE-LABEL: test_pmaddwd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmaddwd:
; SKX: # %bb.0:
-; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -9830,13 +9830,13 @@ define <8 x i16> @test_pmulhuw(<8 x i16>
;
; SKX-SSE-LABEL: test_pmulhuw:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmulhuw:
; SKX: # %bb.0:
-; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -9939,13 +9939,13 @@ define <8 x i16> @test_pmulhw(<8 x i16>
;
; SKX-SSE-LABEL: test_pmulhw:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmulhw:
; SKX: # %bb.0:
-; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -10048,13 +10048,13 @@ define <8 x i16> @test_pmullw(<8 x i16>
;
; SKX-SSE-LABEL: test_pmullw:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmullw:
; SKX: # %bb.0:
-; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -10156,13 +10156,13 @@ define <2 x i64> @test_pmuludq(<4 x i32>
;
; SKX-SSE-LABEL: test_pmuludq:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmuludq:
; SKX: # %bb.0:
-; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -14094,14 +14094,14 @@ define <2 x double> @test_shufpd(<2 x do
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_shufpd:
; SKX: # %bb.0:
; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_shufpd:
@@ -14221,14 +14221,14 @@ define <2 x double> @test_sqrtpd(<2 x do
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00]
; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_sqrtpd:
; SKX: # %bb.0:
; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00]
; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_sqrtpd:
@@ -14362,7 +14362,7 @@ define <2 x double> @test_sqrtsd(<2 x do
; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00]
; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_sqrtsd:
@@ -14370,7 +14370,7 @@ define <2 x double> @test_sqrtsd(<2 x do
; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00]
; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50]
; SKX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_sqrtsd:
@@ -14481,13 +14481,13 @@ define <2 x double> @test_subpd(<2 x dou
;
; SKX-SSE-LABEL: test_subpd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_subpd:
; SKX: # %bb.0:
-; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -14589,13 +14589,13 @@ define double @test_subsd(double %a0, do
;
; SKX-SSE-LABEL: test_subsd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_subsd:
; SKX: # %bb.0:
-; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -14956,14 +14956,14 @@ define <2 x double> @test_unpckhpd(<2 x
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_unpckhpd:
; SKX: # %bb.0:
; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_unpckhpd:
@@ -15097,7 +15097,7 @@ define <2 x double> @test_unpcklpd(<2 x
; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SKX-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33]
; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
@@ -15105,7 +15105,7 @@ define <2 x double> @test_unpcklpd(<2 x
; SKX: # %bb.0:
; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_unpcklpd:
@@ -15228,14 +15228,14 @@ define <2 x double> @test_xorpd(<2 x dou
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_xorpd:
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_xorpd:
Modified: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll Mon Jun 11 07:37:53 2018
@@ -88,13 +88,13 @@ define <2 x double> @test_addsubpd(<2 x
;
; SKX-SSE-LABEL: test_addsubpd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addsubpd:
; SKX: # %bb.0:
-; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -197,13 +197,13 @@ define <4 x float> @test_addsubps(<4 x f
;
; SKX-SSE-LABEL: test_addsubps:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addsubps:
; SKX: # %bb.0:
-; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -972,14 +972,14 @@ define <2 x double> @test_movddup(<2 x d
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
-; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movddup:
; SKX: # %bb.0:
; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
-; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movddup:
@@ -1099,14 +1099,14 @@ define <4 x float> @test_movshdup(<4 x f
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movshdup:
; SKX: # %bb.0:
; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movshdup:
@@ -1226,14 +1226,14 @@ define <4 x float> @test_movsldup(<4 x f
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movsldup:
; SKX: # %bb.0:
; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movsldup:
Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Mon Jun 11 07:37:53 2018
@@ -92,14 +92,14 @@ define <2 x double> @test_blendpd(<2 x d
; SKX-SSE-LABEL: test_blendpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_blendpd:
; SKX: # %bb.0:
; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -212,14 +212,14 @@ define <4 x float> @test_blendps(<4 x fl
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
; SKX-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_blendps:
; SKX: # %bb.0:
; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
; SKX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_blendps:
@@ -2065,14 +2065,14 @@ define <8 x i16> @test_phminposuw(<8 x i
;
; SKX-SSE-LABEL: test_phminposuw:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00]
+; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_phminposuw:
; SKX: # %bb.0:
-; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00]
+; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_phminposuw:
@@ -4767,13 +4767,13 @@ define <2 x i64> @test_pmuldq(<4 x i32>
;
; SKX-SSE-LABEL: test_pmuldq:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmuldq:
; SKX: # %bb.0:
-; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -4871,14 +4871,14 @@ define <4 x i32> @test_pmulld(<4 x i32>
;
; SKX-SSE-LABEL: test_pmulld:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:0.67]
-; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:0.67]
+; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00]
+; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmulld:
; SKX: # %bb.0:
-; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:0.67]
-; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:0.67]
+; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00]
+; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_pmulld:
@@ -5153,16 +5153,16 @@ define <2 x double> @test_roundpd(<2 x d
;
; SKX-SSE-LABEL: test_roundpd:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:0.67]
-; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:0.67]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00]
+; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_roundpd:
; SKX: # %bb.0:
-; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67]
-; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:0.67]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00]
+; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_roundpd:
@@ -5275,16 +5275,16 @@ define <4 x float> @test_roundps(<4 x fl
;
; SKX-SSE-LABEL: test_roundps:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:0.67]
-; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:0.67]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00]
+; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_roundps:
; SKX: # %bb.0:
-; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67]
-; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:0.67]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00]
+; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:1.00]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_roundps:
@@ -5402,16 +5402,16 @@ define <2 x double> @test_roundsd(<2 x d
; SKX-SSE-LABEL: test_roundsd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:0.67]
-; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:0.67]
-; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00]
+; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_roundsd:
; SKX: # %bb.0:
-; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
-; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_roundsd:
@@ -5531,16 +5531,16 @@ define <4 x float> @test_roundss(<4 x fl
; SKX-SSE-LABEL: test_roundss:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:0.67]
-; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:0.67]
-; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00]
+; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_roundss:
; SKX: # %bb.0:
-; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
-; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
-; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
+; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_roundss:
Modified: llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll Mon Jun 11 07:37:53 2018
@@ -1249,13 +1249,13 @@ define <8 x i16> @test_pmaddubsw(<16 x i
;
; SKX-SSE-LABEL: test_pmaddubsw:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmaddubsw:
; SKX: # %bb.0:
-; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@@ -1359,13 +1359,13 @@ define <8 x i16> @test_pmulhrsw(<8 x i16
;
; SKX-SSE-LABEL: test_pmulhrsw:
; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmulhrsw:
; SKX: # %bb.0:
-; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s Mon Jun 11 07:37:53 2018
@@ -1018,25 +1018,25 @@ vzeroupper
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 4 0.33 vaddpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vaddpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vaddpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vaddpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vaddpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vaddpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vaddps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vaddps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vaddsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vaddsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vaddsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vaddss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vaddss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vaddss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vaddsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vaddsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vaddsubpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vaddsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vaddsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vaddsubpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vaddsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vaddsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vaddsubps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vaddsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vaddsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vaddsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vaesdec %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 1.00 * vaesdec (%rax), %xmm1, %xmm2
@@ -1086,41 +1086,41 @@ vzeroupper
# CHECK-NEXT: 1 7 0.50 * vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: 1 6 0.50 * vbroadcastss (%rax), %xmm2
# CHECK-NEXT: 1 7 0.50 * vbroadcastss (%rax), %ymm2
-# CHECK-NEXT: 1 4 0.33 vcmppd $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vcmppd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcmppd $0, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vcmppd $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vcmppd $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vcmppd $0, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vcmpps $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vcmpps $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcmpps $0, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vcmpps $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vcmpps $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vcmpps $0, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vcmpsd $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vcmpsd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vcmpsd $0, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vcmpss $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vcmpss $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vcmpss $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vcomisd %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * vcomisd (%rax), %xmm1
# CHECK-NEXT: 1 2 1.00 vcomiss %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * vcomiss (%rax), %xmm1
-# CHECK-NEXT: 2 5 1.00 vcvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 vcvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 3 11 1.00 * vcvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 vcvtdq2pd %xmm0, %ymm2
# CHECK-NEXT: 3 13 1.00 * vcvtdq2pd (%rax), %ymm2
-# CHECK-NEXT: 1 4 0.33 vcvtdq2ps %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtdq2ps (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 vcvtdq2ps %ymm0, %ymm2
+# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %ymm2
# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqx (%rax), %xmm2
+# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 vcvtps2dq %xmm0, %xmm2
+# CHECK-NEXT: 3 8 1.00 * vcvtpd2psy (%rax), %xmm2
+# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 vcvtps2dq %ymm0, %ymm2
+# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm2
# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * vcvtps2pd (%rax), %xmm2
@@ -1143,22 +1143,22 @@ vzeroupper
# CHECK-NEXT: 2 5 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtss2sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %ecx
-# CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %rcx
+# CHECK-NEXT: 3 7 1.00 vcvtss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %ecx
# CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %rcx
# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqx (%rax), %xmm2
+# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 vcvttps2dq %ymm0, %ymm2
+# CHECK-NEXT: 1 3 0.50 vcvttps2dq %ymm0, %ymm2
# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm2
# CHECK-NEXT: 2 6 1.00 vcvttsd2si %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 vcvttsd2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %ecx
# CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %rcx
-# CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %ecx
+# CHECK-NEXT: 2 6 1.00 vcvttss2si %xmm0, %ecx
# CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %ecx
# CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %rcx
@@ -1216,29 +1216,29 @@ vzeroupper
# CHECK-NEXT: 2 8 0.50 * vmaskmovps (%rax), %ymm0, %ymm2
# CHECK-NEXT: 2 2 1.00 * * vmaskmovps %xmm0, %xmm1, (%rax)
# CHECK-NEXT: 2 2 1.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
-# CHECK-NEXT: 1 4 0.33 vmaxpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vmaxpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vmaxpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vmaxpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vmaxpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vmaxps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vmaxps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vmaxps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vmaxps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vmaxps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vmaxps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vmaxsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vmaxsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vmaxsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vmaxss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vmaxss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vmaxss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vminpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vminpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vminpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vminpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vminpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vminpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vminps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vminps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vminps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vminps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vminps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vminps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vminsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vminsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vminsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vminss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vminss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vminss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vmovapd %xmm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * vmovapd %xmm0, (%rax)
@@ -1327,17 +1327,17 @@ vzeroupper
# CHECK-NEXT: 1 7 0.50 * vmovups (%rax), %ymm2
# CHECK-NEXT: 2 4 2.00 vmpsadbw $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 10 2.00 * vmpsadbw $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vmulpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vmulpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vmulpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vmulpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vmulpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vmulps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vmulps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vmulsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vmulsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vmulsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vmulss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vmulss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vmulss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vorpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.50 * vorpd (%rax), %xmm1, %xmm2
@@ -1441,8 +1441,8 @@ vzeroupper
# CHECK-NEXT: 4 9 2.00 * vphaddsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 3 2.00 vphaddw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 9 2.00 * vphaddw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vphminposuw %xmm0, %xmm2
-# CHECK-NEXT: 2 10 0.50 * vphminposuw (%rax), %xmm2
+# CHECK-NEXT: 1 4 1.00 vphminposuw %xmm0, %xmm2
+# CHECK-NEXT: 2 10 1.00 * vphminposuw (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 vphsubd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 9 2.00 * vphsubd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 3 2.00 vphsubsw %xmm0, %xmm1, %xmm2
@@ -1457,9 +1457,9 @@ vzeroupper
# CHECK-NEXT: 2 6 1.00 * vpinsrq $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 2 2.00 vpinsrw $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 1.00 * vpinsrw $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vpmaddubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vpmaddubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmaddubsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vpmaddwd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vpmaddwd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmaddwd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpmaxsb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.50 * vpmaxsb (%rax), %xmm1, %xmm2
@@ -1510,19 +1510,19 @@ vzeroupper
# CHECK-NEXT: 2 6 1.00 * vpmovzxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 vpmovzxwq %xmm0, %xmm2
# CHECK-NEXT: 2 6 1.00 * vpmovzxwq (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 vpmuldq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vpmuldq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmuldq (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vpmulhrsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vpmulhrsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmulhrsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vpmulhuw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vpmulhuw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmulhuw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vpmulhw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vpmulhw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmulhw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 10 0.67 vpmulld %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 3 16 0.67 * vpmulld (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vpmullw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 10 1.00 vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 16 1.00 * vpmulld (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vpmullw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmullw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vpmuludq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vpmuludq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmuludq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vpor %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.50 * vpor (%rax), %xmm1, %xmm2
@@ -1612,18 +1612,18 @@ vzeroupper
# CHECK-NEXT: 2 11 1.00 * vrcpps (%rax), %ymm2
# CHECK-NEXT: 1 4 1.00 vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 1.00 * vrcpss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 8 0.67 vroundpd $1, %xmm0, %xmm2
-# CHECK-NEXT: 3 14 0.67 * vroundpd $1, (%rax), %xmm2
-# CHECK-NEXT: 2 8 0.67 vroundpd $1, %ymm0, %ymm2
-# CHECK-NEXT: 3 15 0.67 * vroundpd $1, (%rax), %ymm2
-# CHECK-NEXT: 2 8 0.67 vroundps $1, %xmm0, %xmm2
-# CHECK-NEXT: 3 14 0.67 * vroundps $1, (%rax), %xmm2
-# CHECK-NEXT: 2 8 0.67 vroundps $1, %ymm0, %ymm2
-# CHECK-NEXT: 3 15 0.67 * vroundps $1, (%rax), %ymm2
-# CHECK-NEXT: 2 8 0.67 vroundsd $1, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 3 14 0.67 * vroundsd $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 8 0.67 vroundss $1, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 3 14 0.67 * vroundss $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 8 1.00 vroundpd $1, %xmm0, %xmm2
+# CHECK-NEXT: 3 14 1.00 * vroundpd $1, (%rax), %xmm2
+# CHECK-NEXT: 2 8 1.00 vroundpd $1, %ymm0, %ymm2
+# CHECK-NEXT: 3 15 1.00 * vroundpd $1, (%rax), %ymm2
+# CHECK-NEXT: 2 8 1.00 vroundps $1, %xmm0, %xmm2
+# CHECK-NEXT: 3 14 1.00 * vroundps $1, (%rax), %xmm2
+# CHECK-NEXT: 2 8 1.00 vroundps $1, %ymm0, %ymm2
+# CHECK-NEXT: 3 15 1.00 * vroundps $1, (%rax), %ymm2
+# CHECK-NEXT: 2 8 1.00 vroundsd $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 14 1.00 * vroundsd $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 8 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 14 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: 2 10 1.00 * vrsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 vrsqrtps %ymm0, %ymm2
@@ -1651,17 +1651,17 @@ vzeroupper
# CHECK-NEXT: 1 12 3.00 vsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 17 3.00 * vsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 2 1.00 * * * vstmxcsr (%rax)
-# CHECK-NEXT: 1 4 0.33 vsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vsubpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vsubpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vsubps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vsubps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vsubsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vsubsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vsubsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vsubss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vsubss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vsubss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vtestpd %xmm0, %xmm1
# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1
@@ -1716,30 +1716,30 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 123.00 271.00 170.00 171.17 171.17 34.00 376.00 5.00 12.67
+# CHECK-NEXT: - 123.00 290.83 198.83 171.17 171.17 34.00 327.33 5.00 12.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 1.00 - - - - - - - vaesdec %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vaesdec (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vaesdeclast %xmm0, %xmm1, %xmm2
@@ -1788,50 +1788,50 @@ vzeroupper
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastss (%rax), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastss (%rax), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmppd $0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmppd $0, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmppd $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmppd $0, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpps $0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpps $0, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpps $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpps $0, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpsd $0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpsd $0, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpss $0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpss $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmppd $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmppd $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmppd $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmppd $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpps $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpps $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpps $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpps $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpsd $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpsd $0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpss $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpss $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vcomisd %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomisd (%rax), %xmm1
# CHECK-NEXT: - - 1.00 - - - - - - - vcomiss %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomiss (%rax), %xmm1
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtdq2ps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtdq2ps %ymm0, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtps2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtps2dq %ymm0, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %ymm2
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtsd2si %xmm0, %ecx
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtsd2si %xmm0, %rcx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtsd2si (%rax), %ecx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtsd2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsd2ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtsd2ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sdl %ecx, %xmm0, %xmm2
@@ -1844,26 +1844,26 @@ vzeroupper
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssq (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtss2sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtss2si %xmm0, %ecx
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtss2si %xmm0, %rcx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtss2si (%rax), %ecx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtss2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtss2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvtss2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvttps2dq %ymm0, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %ymm2
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvttsd2si %xmm0, %ecx
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvttsd2si %xmm0, %rcx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttsd2si (%rax), %ecx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttsd2si (%rax), %rcx
-# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvttss2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttsd2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttsd2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttsd2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttsd2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvttss2si %xmm0, %rcx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttss2si (%rax), %ecx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttss2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttss2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttss2si (%rax), %rcx
# CHECK-NEXT: - 3.00 1.00 - - - - - - - vdivpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - vdivpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - 5.00 1.00 - - - - - - - vdivpd %ymm0, %ymm1, %ymm2
@@ -1918,30 +1918,30 @@ vzeroupper
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaskmovps (%rax), %ymm0, %ymm2
# CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vmaskmovps %xmm0, %xmm1, (%rax)
# CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vmaskmovps %ymm0, %ymm1, (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxsd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminsd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovapd (%rax), %xmm2
@@ -2029,18 +2029,18 @@ vzeroupper
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovups (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 2.00 - - vmpsadbw $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmpsadbw $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulsd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vorpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vorpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vorpd %ymm0, %ymm1, %ymm2
@@ -2143,8 +2143,8 @@ vzeroupper
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vphaddsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphaddw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphaddw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vphminposuw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vphminposuw (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - - - vphminposuw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vphminposuw (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphsubd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphsubd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vphsubsw %xmm0, %xmm1, %xmm2
@@ -2159,10 +2159,10 @@ vzeroupper
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpinsrq $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 2.00 - - vpinsrw $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpinsrw $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddubsw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddubsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddwd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddwd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddubsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddubsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddwd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddwd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaxsb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaxsb (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaxsd %xmm0, %xmm1, %xmm2
@@ -2212,20 +2212,20 @@ vzeroupper
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxwq %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuldq %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuldq (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhrsw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhrsw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhuw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhuw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vpmulld %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vpmulld (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmullw %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmullw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuludq %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuludq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuldq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuldq (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhrsw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhrsw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhuw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhuw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmullw %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmullw (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuludq %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuludq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpor %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpor (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpsadbw %xmm0, %xmm1, %xmm2
@@ -2314,18 +2314,18 @@ vzeroupper
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpps (%rax), %ymm2
# CHECK-NEXT: - - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundpd $1, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundpd $1, (%rax), %xmm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundpd $1, %ymm0, %ymm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundpd $1, (%rax), %ymm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundps $1, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundps $1, (%rax), %xmm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundps $1, %ymm0, %ymm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundps $1, (%rax), %ymm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundsd $1, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundsd $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundss $1, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundss $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundpd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundpd $1, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundpd $1, %ymm0, %ymm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundpd $1, (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundps $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundps $1, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundps $1, %ymm0, %ymm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundps $1, (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundsd $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrsqrtps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %ymm0, %ymm2
@@ -2353,18 +2353,18 @@ vzeroupper
# CHECK-NEXT: - 3.00 1.00 - - - - - - - vsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - vsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vstmxcsr (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubpd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubpd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubsd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubpd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubpd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vtestpd %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vtestpd (%rax), %xmm1
# CHECK-NEXT: - - 1.00 - - - - - - - vtestpd %ymm0, %ymm1
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s Mon Jun 11 07:37:53 2018
@@ -583,9 +583,9 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 4 10 2.00 * vphsubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 3 3 2.00 vphsubw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 4 10 2.00 * vphsubw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vpmaddubsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vpmaddubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmaddubsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vpmaddwd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vpmaddwd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmaddwd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 7 0.50 * vpmaskmovd (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 8 0.50 * vpmaskmovd (%rax), %ymm0, %ymm2
@@ -644,19 +644,19 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 9 1.00 * vpmovzxwd (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vpmovzxwq %xmm0, %ymm2
# CHECK-NEXT: 2 10 1.00 * vpmovzxwq (%rax), %ymm2
-# CHECK-NEXT: 1 4 0.33 vpmuldq %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vpmuldq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmuldq (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vpmulhrsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vpmulhrsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmulhrsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vpmulhuw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vpmulhuw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmulhuw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vpmulhw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vpmulhw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmulhw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 2 10 0.67 vpmulld %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 3 17 0.67 * vpmulld (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vpmullw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 10 1.00 vpmulld %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 3 17 1.00 * vpmulld (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vpmullw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmullw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vpmuludq %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vpmuludq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmuludq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.33 vpor %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpor (%rax), %ymm1, %ymm2
@@ -771,7 +771,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 107.00 86.00 85.17 85.17 1.00 169.00 - 1.67
+# CHECK-NEXT: - - 110.33 89.33 85.17 85.17 1.00 162.33 - 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -901,10 +901,10 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vphsubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphsubw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphsubw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddubsw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddubsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddwd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddwd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddubsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddubsw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddwd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddwd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaskmovd (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaskmovd (%rax), %ymm0, %ymm2
# CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vpmaskmovd %xmm0, %xmm1, (%rax)
@@ -962,20 +962,20 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwd (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxwq %xmm0, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwq (%rax), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuldq %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuldq (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhrsw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhrsw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhuw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhuw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vpmulld %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vpmulld (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmullw %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmullw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuludq %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuludq (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuldq %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuldq (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhrsw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhrsw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhuw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhuw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - vpmulld %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmullw %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmullw (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuludq %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuludq (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpor %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpsadbw %ymm0, %ymm1, %ymm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s Mon Jun 11 07:37:53 2018
@@ -45,15 +45,15 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 2.67 2.67 1.67 1.67 2.00 8.67 - 0.67
+# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtph2ps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtph2ps (%rax), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.33 0.33 1.00 1.33 - 0.33 vcvtps2ph $0, %xmm0, (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %ymm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.33 0.33 1.00 1.33 - 0.33 vcvtps2ph $0, %ymm0, (%rax)
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax)
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %ymm0, (%rax)
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s Mon Jun 11 07:37:53 2018
@@ -298,197 +298,197 @@ vfnmsub231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 4 0.33 vfmadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmadd132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd132sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmadd213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd213sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmadd231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd231sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmadd132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd132ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmadd213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd213ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmadd231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd231ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmaddsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmaddsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsub132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub132sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsub213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub213sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsub231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub231sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsub132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub132ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsub213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub213ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsub231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub231ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfmsubadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfmsubadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd132sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd213sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd231sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd132ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd213ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmadd231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd231ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub132sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub213sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub231sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub132ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub213ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 4 0.33 vfnmsub231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 4 0.50 vfnmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub231ss (%rax), %xmm1, %xmm2
# CHECK: Resources:
@@ -505,199 +505,199 @@ vfnmsub231ss (%rax), %xmm1, %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 64.00 64.00 48.00 48.00 - 64.00 - -
+# CHECK-NEXT: - - 96.00 96.00 48.00 48.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231pd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231pd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231pd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231pd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231sd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231sd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231pd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231pd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231pd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231pd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231sd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ss (%rax), %xmm1, %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s Mon Jun 11 07:37:53 2018
@@ -194,17 +194,17 @@ xorps (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 4 0.33 addps %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 addps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * addps (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 addss %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 addss %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * addss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 andnps %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * andnps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 andps %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * andps (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 cmpps $0, %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 cmpps $0, %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cmpps $0, (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 cmpss $0, %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 cmpss $0, %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * cmpss $0, (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 comiss %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * comiss (%rax), %xmm1
@@ -217,12 +217,12 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %ecx
-# CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %rcx
+# CHECK-NEXT: 3 7 1.00 cvtss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %ecx
# CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %rcx
# CHECK-NEXT: 2 5 1.00 cvttps2pi %xmm0, %mm2
# CHECK-NEXT: 2 9 0.50 * cvttps2pi (%rax), %mm2
-# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %ecx
+# CHECK-NEXT: 2 6 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %ecx
# CHECK-NEXT: 4 12 1.00 * cvttss2si (%rax), %rcx
@@ -232,13 +232,13 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 2 16 3.00 * divss (%rax), %xmm2
# CHECK-NEXT: 3 7 1.00 * * * ldmxcsr (%rax)
# CHECK-NEXT: 1 1 1.00 * * * maskmovq %mm0, %mm1
-# CHECK-NEXT: 1 4 0.33 maxps %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 maxps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * maxps (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 maxss %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 maxss %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * maxss (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 minps %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 minps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * minps (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 minss %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 minss %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * minss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 movaps %xmm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * movaps %xmm0, (%rax)
@@ -258,9 +258,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 movups %xmm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * movups %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movups (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 mulps %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 mulps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * mulps (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 mulss %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 mulss %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * mulss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 orps %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * orps (%rax), %xmm2
@@ -306,9 +306,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 12 3.00 sqrtss %xmm0, %xmm2
# CHECK-NEXT: 2 17 3.00 * sqrtss (%rax), %xmm2
# CHECK-NEXT: 3 2 1.00 * * * stmxcsr (%rax)
-# CHECK-NEXT: 1 4 0.33 subps %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 subps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * subps (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 subss %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 subss %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * subss (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 ucomiss %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * ucomiss (%rax), %xmm1
@@ -333,25 +333,25 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 26.00 65.50 18.50 32.00 32.00 8.00 45.50 0.50 3.00
+# CHECK-NEXT: - 26.00 65.83 25.83 32.00 32.00 8.00 37.83 0.50 3.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addss %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addss (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - addps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - addss %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addss (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andnps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andnps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpps $0, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpps $0, (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpss $0, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpss $0, (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpps $0, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpps $0, (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpss $0, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpss $0, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - comiss %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comiss (%rax), %xmm1
-# CHECK-NEXT: - - 2.00 - - - - - - - cvtpi2ps %mm0, %xmm2
+# CHECK-NEXT: - - - - - - - 2.00 - - cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2pi (%rax), %mm2
@@ -359,15 +359,15 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - cvtsi2ssq %rcx, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtss2si %xmm0, %ecx
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtss2si %xmm0, %rcx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtss2si (%rax), %ecx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtss2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtss2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvtss2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtss2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtss2si (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttps2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvttps2pi (%rax), %mm2
-# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvttss2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvttss2si %xmm0, %rcx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttss2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttss2si (%rax), %ecx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 1.33 - - cvttss2si (%rax), %rcx
# CHECK-NEXT: - 3.00 1.00 - - - - - - - divps %xmm0, %xmm2
# CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - divps (%rax), %xmm2
@@ -375,14 +375,14 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - divss (%rax), %xmm2
# CHECK-NEXT: - - 1.25 0.25 0.50 0.50 - 0.25 0.25 - ldmxcsr (%rax)
# CHECK-NEXT: - - - - - - - 1.00 - - maskmovq %mm0, %mm1
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxss %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxss (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minss %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minss (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxss %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxss (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - minps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - minss %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minss (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movaps %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movaps %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movaps (%rax), %xmm2
@@ -401,10 +401,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movups %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movups %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movups (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulss %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulss (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulss %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulss (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - orps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - orps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - pavgb %mm0, %mm2
@@ -449,10 +449,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - 3.00 1.00 - - - - - - - sqrtss %xmm0, %xmm2
# CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - sqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 stmxcsr (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subss %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subss (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - subps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - subss %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subss (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - ucomiss %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - ucomiss (%rax), %xmm1
# CHECK-NEXT: - - - - - - - 1.00 - - unpckhps %xmm0, %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s Mon Jun 11 07:37:53 2018
@@ -402,24 +402,24 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 4 0.33 addpd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 addpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * addpd (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 addsd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 addsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * addsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 andnpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * andnpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 andpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * andpd (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 * * * clflush (%rax)
-# CHECK-NEXT: 1 4 0.33 cmppd $0, %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 cmppd $0, %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cmppd $0, (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 cmpsd $0, %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 cmpsd $0, %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * cmpsd $0, (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 comisd %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * comisd (%rax), %xmm1
-# CHECK-NEXT: 2 5 1.00 cvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 3 11 1.00 * cvtdq2pd (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 cvtdq2ps %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 11 1.00 * cvtpd2dq (%rax), %xmm2
@@ -427,9 +427,9 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 3 11 1.00 * cvtpd2pi (%rax), %mm2
# CHECK-NEXT: 2 5 1.00 cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 3 11 1.00 * cvtpd2ps (%rax), %xmm2
-# CHECK-NEXT: 2 5 1.00 cvtpi2pd %mm0, %xmm2
-# CHECK-NEXT: 3 10 1.00 * cvtpi2pd (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 cvtps2dq %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 cvtpi2pd %mm0, %xmm2
+# CHECK-NEXT: 2 9 0.50 * cvtpi2pd (%rax), %xmm2
+# CHECK-NEXT: 1 4 0.50 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtps2dq (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * cvtps2pd (%rax), %xmm2
@@ -449,7 +449,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 3 11 1.00 * cvttpd2dq (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvttpd2pi %xmm0, %mm2
# CHECK-NEXT: 3 11 1.00 * cvttpd2pi (%rax), %mm2
-# CHECK-NEXT: 1 4 0.33 cvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cvttps2dq (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 cvttsd2si %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 cvttsd2si %xmm0, %rcx
@@ -461,13 +461,13 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 19 4.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 2 2 0.50 * * * lfence
# CHECK-NEXT: 2 1 1.00 * * * maskmovdqu %xmm0, %xmm1
-# CHECK-NEXT: 1 4 0.33 maxpd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 maxpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * maxpd (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 maxsd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 maxsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * maxsd (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 minpd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 minpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * minpd (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 minsd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 minsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * minsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 movapd %xmm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * movapd %xmm0, (%rax)
@@ -504,9 +504,9 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 movupd %xmm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * movupd %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movupd (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 mulpd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 mulpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * mulpd (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 mulsd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 mulsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * mulsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 orpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * orpd (%rax), %xmm2
@@ -555,7 +555,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpgtw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pcmpgtw (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 pextrw $1, %xmm0, %ecx
-# CHECK-NEXT: 1 4 0.33 pmaddwd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 pmaddwd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmaddwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmaxsw (%rax), %xmm2
@@ -566,15 +566,15 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pminub (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 pmovmskb %xmm0, %ecx
-# CHECK-NEXT: 1 4 0.33 pmulhuw %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 pmulhuw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmulhuw (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 pmulhw %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 pmulhw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmulhw (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 pmullw %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 pmullw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmullw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmuludq %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pmuludq (%rax), %mm2
-# CHECK-NEXT: 1 4 0.33 pmuludq %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 pmuludq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmuludq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 por %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * por (%rax), %xmm2
@@ -654,9 +654,9 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 24 6.00 * sqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 18 6.00 sqrtsd %xmm0, %xmm2
# CHECK-NEXT: 2 23 6.00 * sqrtsd (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 subpd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 subpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * subpd (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 subsd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 subsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * subsd (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 ucomisd %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * ucomisd (%rax), %xmm1
@@ -681,28 +681,28 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 38.00 101.08 69.08 62.67 62.67 14.00 112.08 1.75 4.67
+# CHECK-NEXT: - 38.00 103.08 82.08 62.67 62.67 14.00 94.08 1.75 4.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - addpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andnpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andnpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andpd (%rax), %xmm2
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 1.25 - clflush (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmppd $0, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmppd $0, (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpsd $0, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpsd $0, (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmppd $0, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmppd $0, (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpsd $0, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpsd $0, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - comisd %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comisd (%rax), %xmm1
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvtdq2ps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2dq (%rax), %xmm2
@@ -710,16 +710,16 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2pi (%rax), %mm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2ps (%rax), %xmm2
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtpi2pd %mm0, %xmm2
-# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvtps2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtpi2pd %mm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2pd (%rax), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtsd2si %xmm0, %ecx
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtsd2si %xmm0, %rcx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtsd2si (%rax), %ecx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtsd2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtsd2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtsd2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsd2ss %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtsd2ss (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sdl %ecx, %xmm2
@@ -732,26 +732,26 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvttpd2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvttpd2pi (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvttps2dq (%rax), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvttsd2si %xmm0, %ecx
-# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvttsd2si %xmm0, %rcx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttsd2si (%rax), %ecx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttsd2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttsd2si %xmm0, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttsd2si %xmm0, %rcx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttsd2si (%rax), %ecx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttsd2si (%rax), %rcx
# CHECK-NEXT: - 3.00 1.00 - - - - - - - divpd %xmm0, %xmm2
# CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - divpd (%rax), %xmm2
# CHECK-NEXT: - 3.00 1.00 - - - - - - - divsd %xmm0, %xmm2
# CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - divsd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - lfence
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 maskmovdqu %xmm0, %xmm1
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxsd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxsd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minsd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minsd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxsd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxsd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - minpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - minsd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minsd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movapd %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movapd %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movapd (%rax), %xmm2
@@ -787,10 +787,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movupd %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movupd %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movupd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulsd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulsd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulsd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulsd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - orpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - orpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - packssdw %xmm0, %xmm2
@@ -838,8 +838,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pcmpgtw %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pcmpgtw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - pextrw $1, %xmm0, %ecx
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmaddwd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmaddwd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaddwd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaddwd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaxsw %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaxsw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaxub %xmm0, %xmm2
@@ -849,16 +849,16 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pminub %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pminub (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - pmovmskb %xmm0, %ecx
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhuw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhuw (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhw (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmullw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmullw (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhuw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhuw (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhw (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmullw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmullw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - pmuludq %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmuludq (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmuludq %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmuludq (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmuludq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmuludq (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - por %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - por (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - psadbw %xmm0, %xmm2
@@ -937,10 +937,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - 6.00 1.00 - 0.50 0.50 - - - - sqrtpd (%rax), %xmm2
# CHECK-NEXT: - 6.00 1.00 - - - - - - - sqrtsd %xmm0, %xmm2
# CHECK-NEXT: - 6.00 1.00 - 0.50 0.50 - - - - sqrtsd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subsd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subsd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - subpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - subsd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subsd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - ucomisd %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - ucomisd (%rax), %xmm1
# CHECK-NEXT: - - - - - - - 1.00 - - unpckhpd %xmm0, %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s Mon Jun 11 07:37:53 2018
@@ -39,9 +39,9 @@ movsldup (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 4 0.33 addsubpd %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 addsubpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * addsubpd (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 addsubps %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 addsubps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * addsubps (%rax), %xmm2
# CHECK-NEXT: 3 6 2.00 haddpd %xmm0, %xmm2
# CHECK-NEXT: 4 12 2.00 * haddpd (%rax), %xmm2
@@ -73,14 +73,14 @@ movsldup (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 4.00 4.00 5.00 5.00 - 23.00 - -
+# CHECK-NEXT: - - 4.67 4.67 5.00 5.00 - 21.67 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsubpd %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsubpd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsubps %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsubps (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsubpd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubpd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsubps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - haddpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddps %xmm0, %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s Mon Jun 11 07:37:53 2018
@@ -189,8 +189,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 pextrq $1, %xmm0, %rcx
# CHECK-NEXT: 3 2 1.00 * pextrq $1, %xmm0, (%rax)
# CHECK-NEXT: 3 2 1.00 * pextrw $1, %xmm0, (%rax)
-# CHECK-NEXT: 1 4 0.33 phminposuw %xmm0, %xmm2
-# CHECK-NEXT: 2 10 0.50 * phminposuw (%rax), %xmm2
+# CHECK-NEXT: 1 4 1.00 phminposuw %xmm0, %xmm2
+# CHECK-NEXT: 2 10 1.00 * phminposuw (%rax), %xmm2
# CHECK-NEXT: 2 2 2.00 pinsrb $1, %eax, %xmm1
# CHECK-NEXT: 2 6 1.00 * pinsrb $1, (%rax), %xmm1
# CHECK-NEXT: 2 2 2.00 pinsrd $1, %eax, %xmm1
@@ -237,20 +237,20 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 * pmovzxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pmovzxwq %xmm0, %xmm2
# CHECK-NEXT: 2 6 1.00 * pmovzxwq (%rax), %xmm2
-# CHECK-NEXT: 1 4 0.33 pmuldq %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 pmuldq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmuldq (%rax), %xmm2
-# CHECK-NEXT: 2 10 0.67 pmulld %xmm0, %xmm2
-# CHECK-NEXT: 3 16 0.67 * pmulld (%rax), %xmm2
+# CHECK-NEXT: 2 10 1.00 pmulld %xmm0, %xmm2
+# CHECK-NEXT: 3 16 1.00 * pmulld (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 ptest %xmm0, %xmm1
# CHECK-NEXT: 3 9 1.00 * ptest (%rax), %xmm1
-# CHECK-NEXT: 2 8 0.67 roundpd $1, %xmm0, %xmm2
-# CHECK-NEXT: 3 14 0.67 * roundpd $1, (%rax), %xmm2
-# CHECK-NEXT: 2 8 0.67 roundps $1, %xmm0, %xmm2
-# CHECK-NEXT: 3 14 0.67 * roundps $1, (%rax), %xmm2
-# CHECK-NEXT: 2 8 0.67 roundsd $1, %xmm0, %xmm2
-# CHECK-NEXT: 3 14 0.67 * roundsd $1, (%rax), %xmm2
-# CHECK-NEXT: 2 8 0.67 roundss $1, %xmm0, %xmm2
-# CHECK-NEXT: 3 14 0.67 * roundss $1, (%rax), %xmm2
+# CHECK-NEXT: 2 8 1.00 roundpd $1, %xmm0, %xmm2
+# CHECK-NEXT: 3 14 1.00 * roundpd $1, (%rax), %xmm2
+# CHECK-NEXT: 2 8 1.00 roundps $1, %xmm0, %xmm2
+# CHECK-NEXT: 3 14 1.00 * roundps $1, (%rax), %xmm2
+# CHECK-NEXT: 2 8 1.00 roundsd $1, %xmm0, %xmm2
+# CHECK-NEXT: 3 14 1.00 * roundsd $1, (%rax), %xmm2
+# CHECK-NEXT: 2 8 1.00 roundss $1, %xmm0, %xmm2
+# CHECK-NEXT: 3 14 1.00 * roundss $1, (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SKXDivider
@@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 31.67 25.67 23.67 23.67 5.00 74.67 - 1.67
+# CHECK-NEXT: - - 36.67 28.67 23.67 23.67 5.00 66.67 - 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -304,8 +304,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - pextrq $1, %xmm0, %rcx
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 pextrq $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 pextrw $1, %xmm0, (%rax)
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - phminposuw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - phminposuw (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - - - - phminposuw %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - phminposuw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 2.00 - - pinsrb $1, %eax, %xmm1
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pinsrb $1, (%rax), %xmm1
# CHECK-NEXT: - - - - - - - 2.00 - - pinsrd $1, %eax, %xmm1
@@ -352,17 +352,17 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pmovzxwd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - pmovzxwq %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pmovzxwq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmuldq %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmuldq (%rax), %xmm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - pmulld %xmm0, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - pmulld (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmuldq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmuldq (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - pmulld %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - pmulld (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - ptest %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - ptest (%rax), %xmm1
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundpd $1, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundpd $1, (%rax), %xmm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundps $1, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundps $1, (%rax), %xmm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundsd $1, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundsd $1, (%rax), %xmm2
-# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundss $1, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundss $1, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundpd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundpd $1, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundps $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundps $1, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundsd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundsd $1, (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundss $1, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundss $1, (%rax), %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s Mon Jun 11 07:37:53 2018
@@ -148,11 +148,11 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 4 9 2.00 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pmaddubsw (%rax), %mm2
-# CHECK-NEXT: 1 4 0.33 pmaddubsw %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 pmaddubsw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmaddubsw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmulhrsw %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pmulhrsw (%rax), %mm2
-# CHECK-NEXT: 1 4 0.33 pmulhrsw %xmm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 pmulhrsw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmulhrsw (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pshufb %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * pshufb (%rax), %mm2
@@ -185,7 +185,7 @@ psignw (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 30.00 12.00 16.00 16.00 - 70.00 - -
+# CHECK-NEXT: - - 30.67 12.67 16.00 16.00 - 68.67 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -231,12 +231,12 @@ psignw (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - phsubw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - pmaddubsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmaddubsw (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmaddubsw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmaddubsw (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaddubsw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaddubsw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - pmulhrsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmulhrsw (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhrsw %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhrsw (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhrsw %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhrsw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - pshufb %mm0, %mm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pshufb (%rax), %mm2
# CHECK-NEXT: - - - - - - - 1.00 - - pshufb %xmm0, %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s?rev=334407&r1=334406&r2=334407&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s Mon Jun 11 07:37:53 2018
@@ -822,13 +822,13 @@ xorq (%rax), %rdi
# CHECK-NEXT: 3 7 1.00 * * negl (%rax)
# CHECK-NEXT: 1 1 0.25 negq %rcx
# CHECK-NEXT: 3 7 1.00 * * negq (%r10)
-# CHECK-NEXT: 1 1 0.25 nop
-# CHECK-NEXT: 1 1 0.25 nopw %di
-# CHECK-NEXT: 1 1 0.25 nopw (%rcx)
-# CHECK-NEXT: 1 1 0.25 nopl %esi
-# CHECK-NEXT: 1 1 0.25 nopl (%r8)
-# CHECK-NEXT: 1 1 0.25 nopq %rdx
-# CHECK-NEXT: 1 1 0.25 nopq (%r9)
+# CHECK-NEXT: 1 1 0.17 nop
+# CHECK-NEXT: 1 1 0.17 nopw %di
+# CHECK-NEXT: 1 1 0.17 nopw (%rcx)
+# CHECK-NEXT: 1 1 0.17 nopl %esi
+# CHECK-NEXT: 1 1 0.17 nopl (%r8)
+# CHECK-NEXT: 1 1 0.17 nopq %rdx
+# CHECK-NEXT: 1 1 0.17 nopq (%r9)
# CHECK-NEXT: 1 1 0.25 notb %dil
# CHECK-NEXT: 3 7 1.00 * * notb (%r8)
# CHECK-NEXT: 1 1 0.25 notw %si
@@ -1164,7 +1164,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: 60.00 - 431.50 225.50 202.00 202.00 167.00 186.00 416.00 69.00
+# CHECK-NEXT: 60.00 - 429.75 223.75 202.00 202.00 167.00 184.25 414.25 69.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1381,13 +1381,13 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 negl (%rax)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - negq %rcx
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 negq (%r10)
-# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nop
-# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopw %di
-# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopw (%rcx)
-# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopl %esi
-# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopl (%r8)
-# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopq %rdx
-# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopq (%r9)
+# CHECK-NEXT: - - - - - - - - - - nop
+# CHECK-NEXT: - - - - - - - - - - nopw %di
+# CHECK-NEXT: - - - - - - - - - - nopw (%rcx)
+# CHECK-NEXT: - - - - - - - - - - nopl %esi
+# CHECK-NEXT: - - - - - - - - - - nopl (%r8)
+# CHECK-NEXT: - - - - - - - - - - nopq %rdx
+# CHECK-NEXT: - - - - - - - - - - nopq (%r9)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - notb %dil
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 notb (%r8)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - notw %si
More information about the llvm-commits
mailing list