[llvm] r331672 - [X86] Split WriteFAdd/WriteFCmp/WriteFMul schedule classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon May 7 13:52:53 PDT 2018
Author: rksimon
Date: Mon May 7 13:52:53 2018
New Revision: 331672
URL: http://llvm.org/viewvc/llvm-project?rev=331672&view=rev
Log:
[X86] Split WriteFAdd/WriteFCmp/WriteFMul schedule classes
Split to support single/double for scalar, XMM and YMM/ZMM instructions - removing InstrRW overrides for these instructions.
Fixes Atom ADDSUBPD instruction and reclassifies VFPCLASS as WriteFCmp which is closer in behaviour.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
llvm/trunk/test/tools/llvm-mca/X86/Atom/resources-sse2.s
llvm/trunk/test/tools/llvm-mca/X86/Atom/resources-sse3.s
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon May 7 13:52:53 2018
@@ -2672,9 +2672,8 @@ multiclass avx512_fp_fpclass_all<string
EVEX_CD8<64, CD8VT1>, VEX_W;
}
-// FIXME: Is there a better scheduler class for VFPCLASS?
defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
- X86Vfpclasss, SchedWriteFAdd, HasDQI>,
+ X86Vfpclasss, SchedWriteFCmp, HasDQI>,
AVX512AIi8Base, EVEX;
//-----------------------------------------------------------------
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon May 7 13:52:53 2018
@@ -1854,12 +1854,12 @@ let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmp.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG;
+ SchedWriteFCmpSizes.PS.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmp.Scl>, // same latency as 32 bit compare
+ SchedWriteFCmpSizes.PD.Scl>,
XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
@@ -1867,12 +1867,12 @@ let Constraints = "$src1 = $dst" in {
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
"cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmp.Scl>, XS;
+ SchedWriteFCmpSizes.PS.Scl>, XS;
let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmp.Scl>, XD;
+ SchedWriteFCmpSizes.PD.Scl>, XD;
}
multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
@@ -1896,21 +1896,21 @@ let isCodeGenOnly = 1 in {
let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- SchedWriteFCmp.Scl, sse_load_f32>, XS, VEX_4V;
+ SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS, VEX_4V;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- SchedWriteFCmp.Scl, sse_load_f64>, // same latency as f32
+ SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
- SchedWriteFCmp.Scl, sse_load_f32>, XS;
+ SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
- SchedWriteFCmp.Scl, sse_load_f64>, XD;
+ SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
}
}
@@ -2030,28 +2030,28 @@ multiclass sse12_cmp_packed<RegisterClas
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, v4f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmp.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
+ SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, v2f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmp.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
+ SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, v8f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmp.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, v4f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmp.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, v4f32,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmp.XMM, SSEPackedSingle, memopv4f32>, PS;
+ SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, v2f64,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmp.XMM, SSEPackedDouble, memopv2f64>, PD;
+ SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
}
def CommutableCMPCC : PatLeaf<(imm), [{
@@ -4438,28 +4438,28 @@ multiclass sse3_addsub<string OpcodeStr,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem,
- SchedWriteFAdd.XMM, loadv4f32, 0>,
+ SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>,
XD, VEX_4V, VEX_WIG;
defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem,
- SchedWriteFAdd.YMM, loadv8f32, 0>,
+ SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>,
XD, VEX_4V, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem,
- SchedWriteFAdd.XMM, loadv2f64, 0>,
+ SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>,
PD, VEX_4V, VEX_WIG;
defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem,
- SchedWriteFAdd.YMM, loadv4f64, 0>,
+ SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>,
PD, VEX_4V, VEX_L, VEX_WIG;
}
}
let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
let ExeDomain = SSEPackedSingle in
defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem,
- SchedWriteFAdd.XMM, memopv4f32>, XD;
+ SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD;
let ExeDomain = SSEPackedDouble in
defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem,
- SchedWriteFAdd.XMM, memopv2f64>, PD;
+ SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD;
}
//===---------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Mon May 7 13:52:53 2018
@@ -155,13 +155,28 @@ def : WriteRes<WriteFLoad, [BWPo
def : WriteRes<WriteFStore, [BWPort237, BWPort4]>;
def : WriteRes<WriteFMove, [BWPort5]>;
-defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
-defm : BWWriteResPair<WriteFAddY, [BWPort1], 3, [1], 1, 6>; // Floating point add/sub (YMM/ZMM).
-defm : BWWriteResPair<WriteFCmp, [BWPort1], 3, [1], 1, 5>; // Floating point compare.
-defm : BWWriteResPair<WriteFCmpY, [BWPort1], 3, [1], 1, 6>; // Floating point compare (YMM/ZMM).
-defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
-defm : BWWriteResPair<WriteFMul, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication.
-defm : BWWriteResPair<WriteFMulY, [BWPort01], 3, [1], 1, 6>; // Floating point multiplication (YMM/ZMM).
+defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
+defm : BWWriteResPair<WriteFAddX, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub (XMM).
+defm : BWWriteResPair<WriteFAddY, [BWPort1], 3, [1], 1, 6>; // Floating point add/sub (YMM/ZMM).
+defm : BWWriteResPair<WriteFAdd64, [BWPort1], 3, [1], 1, 5>; // Floating point double add/sub.
+defm : BWWriteResPair<WriteFAdd64X, [BWPort1], 3, [1], 1, 5>; // Floating point double add/sub (XMM).
+defm : BWWriteResPair<WriteFAdd64Y, [BWPort1], 3, [1], 1, 6>; // Floating point double add/sub (YMM/ZMM).
+
+defm : BWWriteResPair<WriteFCmp, [BWPort1], 3, [1], 1, 5>; // Floating point compare.
+defm : BWWriteResPair<WriteFCmpX, [BWPort1], 3, [1], 1, 5>; // Floating point compare (XMM).
+defm : BWWriteResPair<WriteFCmpY, [BWPort1], 3, [1], 1, 6>; // Floating point compare (YMM/ZMM).
+defm : BWWriteResPair<WriteFCmp64, [BWPort1], 3, [1], 1, 5>; // Floating point double compare.
+defm : BWWriteResPair<WriteFCmp64X, [BWPort1], 3, [1], 1, 5>; // Floating point double compare (XMM).
+defm : BWWriteResPair<WriteFCmp64Y, [BWPort1], 3, [1], 1, 6>; // Floating point double compare (YMM/ZMM).
+
+defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
+
+defm : BWWriteResPair<WriteFMul, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication.
+defm : BWWriteResPair<WriteFMulX, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication (XMM).
+defm : BWWriteResPair<WriteFMulY, [BWPort01], 3, [1], 1, 6>; // Floating point multiplication (YMM/ZMM).
+defm : BWWriteResPair<WriteFMul64, [BWPort01], 3, [1], 1, 5>; // Floating point double multiplication.
+defm : BWWriteResPair<WriteFMul64X, [BWPort01], 3, [1], 1, 5>; // Floating point double multiplication (XMM).
+defm : BWWriteResPair<WriteFMul64Y, [BWPort01], 3, [1], 1, 6>; // Floating point double multiplication (YMM/ZMM).
//defm : BWWriteResPair<WriteFDiv, [BWPort0,BWFPDivider], 11, [1,3], 1, 5>; // Floating point division.
defm : BWWriteResPair<WriteFDivX, [BWPort0,BWFPDivider], 11, [1,5], 1, 5>; // Floating point division (XMM).
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Mon May 7 13:52:53 2018
@@ -149,13 +149,28 @@ def : WriteRes<WriteFLoad, [HWPo
def : WriteRes<WriteFMove, [HWPort5]>;
defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>;
-defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 6>;
-defm : HWWriteResPair<WriteFAddY, [HWPort1], 3, [1], 1, 7>;
-defm : HWWriteResPair<WriteFCmp, [HWPort1], 3, [1], 1, 6>;
-defm : HWWriteResPair<WriteFCmpY, [HWPort1], 3, [1], 1, 7>;
-defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
-defm : HWWriteResPair<WriteFMul, [HWPort01], 5, [1], 1, 6>;
-defm : HWWriteResPair<WriteFMulY, [HWPort01], 5, [1], 1, 7>;
+defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>;
+defm : HWWriteResPair<WriteFAddX, [HWPort1], 3, [1], 1, 6>;
+defm : HWWriteResPair<WriteFAddY, [HWPort1], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteFAdd64, [HWPort1], 3, [1], 1, 5>;
+defm : HWWriteResPair<WriteFAdd64X, [HWPort1], 3, [1], 1, 6>;
+defm : HWWriteResPair<WriteFAdd64Y, [HWPort1], 3, [1], 1, 7>;
+
+defm : HWWriteResPair<WriteFCmp, [HWPort1], 3, [1], 1, 5>;
+defm : HWWriteResPair<WriteFCmpX, [HWPort1], 3, [1], 1, 6>;
+defm : HWWriteResPair<WriteFCmpY, [HWPort1], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteFCmp64, [HWPort1], 3, [1], 1, 5>;
+defm : HWWriteResPair<WriteFCmp64X, [HWPort1], 3, [1], 1, 6>;
+defm : HWWriteResPair<WriteFCmp64Y, [HWPort1], 3, [1], 1, 7>;
+
+defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
+
+defm : HWWriteResPair<WriteFMul, [HWPort01], 5, [1], 1, 5>;
+defm : HWWriteResPair<WriteFMulX, [HWPort01], 5, [1], 1, 6>;
+defm : HWWriteResPair<WriteFMulY, [HWPort01], 5, [1], 1, 7>;
+defm : HWWriteResPair<WriteFMul64, [HWPort01], 5, [1], 1, 5>;
+defm : HWWriteResPair<WriteFMul64X, [HWPort01], 5, [1], 1, 6>;
+defm : HWWriteResPair<WriteFMul64Y, [HWPort01], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteFDiv, [HWPort0,HWFPDivider], 13, [1,7], 1, 5>;
defm : HWWriteResPair<WriteFDivX, [HWPort0,HWFPDivider], 13, [1,7], 1, 6>;
@@ -882,17 +897,7 @@ def HWWriteResGroup12 : SchedWriteRes<[H
}
def: InstRW<[HWWriteResGroup12], (instregex "MMX_CVTPI2PSirm",
"PDEP(32|64)rm",
- "PEXT(32|64)rm",
- "(V?)ADDSDrm",
- "(V?)ADDSSrm",
- "(V?)CMPSDrm",
- "(V?)CMPSSrm",
- "(V?)MAX(C?)SDrm",
- "(V?)MAX(C?)SSrm",
- "(V?)MIN(C?)SDrm",
- "(V?)MIN(C?)SSrm",
- "(V?)SUBSDrm",
- "(V?)SUBSSrm")>;
+ "PEXT(32|64)rm")>;
def HWWriteResGroup12_1 : SchedWriteRes<[HWPort1,HWPort0156,HWPort23]> {
let Latency = 8;
@@ -1658,14 +1663,6 @@ def HWWriteResGroup91_5 : SchedWriteRes<
}
def: InstRW<[HWWriteResGroup91_5], (instregex "MMX_PSADBWirm")>;
-def HWWriteResGroup92_2 : SchedWriteRes<[HWPort01,HWPort23]> {
- let Latency = 10;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup92_2], (instregex "(V?)MULSDrm",
- "(V?)MULSSrm")>;
-
def HWWriteResGroup93 : SchedWriteRes<[HWPort1,HWPort5]> {
let Latency = 5;
let NumMicroOps = 3;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Mon May 7 13:52:53 2018
@@ -139,13 +139,28 @@ def : WriteRes<WriteFLoad, [SBPo
def : WriteRes<WriteFMove, [SBPort5]>;
defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
-defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
-defm : SBWriteResPair<WriteFAddY, [SBPort1], 3, [1], 1, 7>;
-defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>;
-defm : SBWriteResPair<WriteFCmpY, [SBPort1], 3, [1], 1, 7>;
-defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
-defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>;
-defm : SBWriteResPair<WriteFMulY, [SBPort0], 5, [1], 1, 7>;
+defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
+defm : SBWriteResPair<WriteFAddX, [SBPort1], 3, [1], 1, 6>;
+defm : SBWriteResPair<WriteFAddY, [SBPort1], 3, [1], 1, 7>;
+defm : SBWriteResPair<WriteFAdd64, [SBPort1], 3, [1], 1, 6>;
+defm : SBWriteResPair<WriteFAdd64X, [SBPort1], 3, [1], 1, 6>;
+defm : SBWriteResPair<WriteFAdd64Y, [SBPort1], 3, [1], 1, 7>;
+
+defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>;
+defm : SBWriteResPair<WriteFCmpX, [SBPort1], 3, [1], 1, 6>;
+defm : SBWriteResPair<WriteFCmpY, [SBPort1], 3, [1], 1, 7>;
+defm : SBWriteResPair<WriteFCmp64, [SBPort1], 3, [1], 1, 6>;
+defm : SBWriteResPair<WriteFCmp64X, [SBPort1], 3, [1], 1, 6>;
+defm : SBWriteResPair<WriteFCmp64Y, [SBPort1], 3, [1], 1, 7>;
+
+defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
+
+defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>;
+defm : SBWriteResPair<WriteFMulX, [SBPort0], 5, [1], 1, 6>;
+defm : SBWriteResPair<WriteFMulY, [SBPort0], 5, [1], 1, 7>;
+defm : SBWriteResPair<WriteFMul64, [SBPort0], 5, [1], 1, 6>;
+defm : SBWriteResPair<WriteFMul64X, [SBPort0], 5, [1], 1, 6>;
+defm : SBWriteResPair<WriteFMul64Y, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WriteFDiv, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
defm : SBWriteResPair<WriteFDivX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Mon May 7 13:52:53 2018
@@ -152,13 +152,28 @@ def : WriteRes<WriteFStore, [SKL
def : WriteRes<WriteFMove, [SKLPort015]>;
defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
-defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 6>; // Floating point add/sub.
-defm : SKLWriteResPair<WriteFAddY, [SKLPort01], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
-defm : SKLWriteResPair<WriteFCmp, [SKLPort01], 4, [1], 1, 6>; // Floating point compare.
-defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM).
-defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
-defm : SKLWriteResPair<WriteFMul, [SKLPort01], 4, [1], 1, 6>; // Floating point multiplication.
-defm : SKLWriteResPair<WriteFMulY, [SKLPort01], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
+defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 5>; // Floating point add/sub.
+defm : SKLWriteResPair<WriteFAddX, [SKLPort01], 4, [1], 1, 6>; // Floating point add/sub (XMM).
+defm : SKLWriteResPair<WriteFAddY, [SKLPort01], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
+defm : SKLWriteResPair<WriteFAdd64, [SKLPort01], 4, [1], 1, 5>; // Floating point double add/sub.
+defm : SKLWriteResPair<WriteFAdd64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double add/sub (XMM).
+defm : SKLWriteResPair<WriteFAdd64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double add/sub (YMM/ZMM).
+
+defm : SKLWriteResPair<WriteFCmp, [SKLPort01], 4, [1], 1, 5>; // Floating point compare.
+defm : SKLWriteResPair<WriteFCmpX, [SKLPort01], 4, [1], 1, 6>; // Floating point compare (XMM).
+defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM).
+defm : SKLWriteResPair<WriteFCmp64, [SKLPort01], 4, [1], 1, 5>; // Floating point double compare.
+defm : SKLWriteResPair<WriteFCmp64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double compare (XMM).
+defm : SKLWriteResPair<WriteFCmp64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double compare (YMM/ZMM).
+
+defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
+
+defm : SKLWriteResPair<WriteFMul, [SKLPort01], 4, [1], 1, 5>; // Floating point multiplication.
+defm : SKLWriteResPair<WriteFMulX, [SKLPort01], 4, [1], 1, 6>; // Floating point multiplication (XMM).
+defm : SKLWriteResPair<WriteFMulY, [SKLPort01], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
+defm : SKLWriteResPair<WriteFMul64, [SKLPort01], 4, [1], 1, 5>; // Floating point double multiplication.
+defm : SKLWriteResPair<WriteFMul64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double multiplication (XMM).
+defm : SKLWriteResPair<WriteFMul64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double multiplication (YMM/ZMM).
defm : SKLWriteResPair<WriteFDiv, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 5>; // Floating point division.
//defm : SKLWriteResPair<WriteFDivX, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>; // Floating point division (XMM).
@@ -1473,24 +1488,6 @@ def: InstRW<[SKLWriteResGroup121], (inst
"VPMOVSXWDYrm",
"VPMOVZXWDYrm")>;
-def SKLWriteResGroup122 : SchedWriteRes<[SKLPort01,SKLPort23]> {
- let Latency = 9;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup122], (instregex "(V?)ADDSDrm",
- "(V?)ADDSSrm",
- "(V?)CMPSDrm",
- "(V?)CMPSSrm",
- "(V?)MAX(C?)SDrm",
- "(V?)MAX(C?)SSrm",
- "(V?)MIN(C?)SDrm",
- "(V?)MIN(C?)SSrm",
- "(V?)MULSDrm",
- "(V?)MULSSrm",
- "(V?)SUBSDrm",
- "(V?)SUBSSrm")>;
-
def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
let Latency = 9;
let NumMicroOps = 2;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Mon May 7 13:52:53 2018
@@ -152,13 +152,28 @@ def : WriteRes<WriteFStore, [SKX
def : WriteRes<WriteFMove, [SKXPort015]>;
defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
-defm : SKXWriteResPair<WriteFAdd, [SKXPort015], 4, [1], 1, 6>; // Floating point add/sub.
-defm : SKXWriteResPair<WriteFAddY,[SKXPort015], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
-defm : SKXWriteResPair<WriteFCmp, [SKXPort015], 4, [1], 1, 6>; // Floating point compare.
-defm : SKXWriteResPair<WriteFCmpY,[SKXPort015], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM).
-defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
-defm : SKXWriteResPair<WriteFMul, [SKXPort015], 4, [1], 1, 6>; // Floating point multiplication.
-defm : SKXWriteResPair<WriteFMulY,[SKXPort015], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
+defm : SKXWriteResPair<WriteFAdd, [SKXPort015], 4, [1], 1, 5>; // Floating point add/sub.
+defm : SKXWriteResPair<WriteFAddX, [SKXPort015], 4, [1], 1, 6>; // Floating point add/sub (XMM).
+defm : SKXWriteResPair<WriteFAddY, [SKXPort015], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
+defm : SKXWriteResPair<WriteFAdd64, [SKXPort015], 4, [1], 1, 5>; // Floating point double add/sub.
+defm : SKXWriteResPair<WriteFAdd64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double add/sub (XMM).
+defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double add/sub (YMM/ZMM).
+
+defm : SKXWriteResPair<WriteFCmp, [SKXPort015], 4, [1], 1, 5>; // Floating point compare.
+defm : SKXWriteResPair<WriteFCmpX, [SKXPort015], 4, [1], 1, 6>; // Floating point compare (XMM).
+defm : SKXWriteResPair<WriteFCmpY, [SKXPort015], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM).
+defm : SKXWriteResPair<WriteFCmp64, [SKXPort015], 4, [1], 1, 5>; // Floating point double compare.
+defm : SKXWriteResPair<WriteFCmp64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double compare (XMM).
+defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double compare (YMM/ZMM).
+
+defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
+
+defm : SKXWriteResPair<WriteFMul, [SKXPort015], 4, [1], 1, 5>; // Floating point multiplication.
+defm : SKXWriteResPair<WriteFMulX, [SKXPort015], 4, [1], 1, 6>; // Floating point multiplication (XMM).
+defm : SKXWriteResPair<WriteFMulY, [SKXPort015], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
+defm : SKXWriteResPair<WriteFMul64, [SKXPort015], 4, [1], 1, 5>; // Floating point double multiplication.
+defm : SKXWriteResPair<WriteFMul64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double multiplication (XMM).
+defm : SKXWriteResPair<WriteFMul64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double multiplication (YMM/ZMM).
defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
//defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM).
@@ -2472,20 +2487,8 @@ def SKXWriteResGroup137 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVTPS2PIirm",
"MMX_CVTTPS2PIirm",
- "(V?)ADDSDrm",
- "(V?)ADDSSrm",
- "(V?)CMPSDrm",
- "(V?)CMPSSrm",
"VCVTPH2PSrm",
- "(V?)CVTPS2PDrm",
- "(V?)MAX(C?)SDrm",
- "(V?)MAX(C?)SSrm",
- "(V?)MIN(C?)SDrm",
- "(V?)MIN(C?)SSrm",
- "(V?)MULSDrm",
- "(V?)MULSSrm",
- "(V?)SUBSDrm",
- "(V?)SUBSSrm")>;
+ "(V?)CVTPS2PDrm")>;
def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> {
let Latency = 9;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Mon May 7 13:52:53 2018
@@ -105,24 +105,37 @@ def WriteZero : SchedWrite;
defm WriteJump : X86SchedWritePair;
// Floating point. This covers both scalar and vector operations.
-def WriteFLoad : SchedWrite;
-def WriteFStore : SchedWrite;
-def WriteFMove : SchedWrite;
-defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
-defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM/ZMM).
-defm WriteFCmp : X86SchedWritePair; // Floating point compare.
-defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM/ZMM).
-defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
-defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
-defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM/ZMM).
+def WriteFLoad : SchedWrite;
+def WriteFStore : SchedWrite;
+def WriteFMove : SchedWrite;
+
+defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
+defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
+defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM/ZMM).
+defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub.
+defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
+defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM/ZMM).
+defm WriteFCmp : X86SchedWritePair; // Floating point compare.
+defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM).
+defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM/ZMM).
+defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare.
+defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
+defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM/ZMM).
+defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
+defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
+defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM).
+defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM/ZMM).
+defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication.
+defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
+defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM/ZMM).
defm WriteFDiv : X86SchedWritePair; // Floating point division.
defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM).
defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM).
defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM).
-defm WriteFDiv64 : X86SchedWritePair; // Floating point division.
-defm WriteFDiv64X : X86SchedWritePair; // Floating point division (XMM).
-defm WriteFDiv64Y : X86SchedWritePair; // Floating point division (YMM).
-defm WriteFDiv64Z : X86SchedWritePair; // Floating point division (ZMM).
+defm WriteFDiv64 : X86SchedWritePair; // Floating point double division.
+defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM).
+defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM).
+defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM).
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
@@ -267,15 +280,19 @@ def WriteNop : SchedWrite;
// Vector width wrappers.
def SchedWriteFAdd
- : X86SchedWriteWidths<WriteFAdd, WriteFAdd, WriteFAddY, WriteFAddY>;
+ : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddY>;
+def SchedWriteFAdd64
+ : X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Y>;
def SchedWriteFHAdd
: X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddY>;
def SchedWriteFCmp
- : X86SchedWriteWidths<WriteFCmp, WriteFCmp, WriteFCmpY, WriteFCmpY>;
+ : X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpY>;
+def SchedWriteFCmp64
+ : X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Y>;
def SchedWriteFMul
- : X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMulY, WriteFMulY>;
+ : X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulY>;
def SchedWriteFMul64
- : X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMulY, WriteFMulY>;
+ : X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Y>;
def SchedWriteFMA
: X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAY>;
def SchedWriteDPPD
@@ -356,9 +373,9 @@ def SchedWriteVarBlend
// Vector size wrappers.
def SchedWriteFAddSizes
- : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd>;
+ : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd64>;
def SchedWriteFCmpSizes
- : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp>;
+ : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp64>;
def SchedWriteFMulSizes
: X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
def SchedWriteFDivSizes
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Mon May 7 13:52:53 2018
@@ -204,12 +204,24 @@ def : WriteRes<WriteFMove, [AtomPort01
defm : X86WriteRes<WriteEMMS,[AtomPort01], 5, [5], 1>;
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFAddX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAddY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFAdd64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFAdd64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteFAdd64Y, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFCmpX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmpY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFCmp64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFCmp64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteFCmp64Y, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMulY, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFMul64X, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
+defm : AtomWriteResPair<WriteFMul64Y, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRcpX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFRcpY, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
@@ -398,8 +410,7 @@ def AtomWrite0_5 : SchedWriteRes<[AtomPo
let Latency = 5;
let ResourceCycles = [5];
}
-def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)",
- "MUL(PS|SD)(rr|rm)(_Int)?")>;
+def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>;
// Port1
def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
@@ -518,13 +529,10 @@ def : InstRW<[AtomWrite01_6], (instrs LD
SHLD16rri8, SHRD16rri8,
SHLD16mrCL, SHRD16mrCL,
SHLD16mri8, SHRD16mri8,
- ADDSUBPDrr, ADDSUBPDrm,
CVTPS2DQrr, CVTTPS2DQrr)>;
def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
"IST_F(P)?(16|32|64)?m",
- "MMX_PH(ADD|SUB)S?Wrm",
- "(ADD|SUB|MAX|MIN)PDrr",
- "CMPPDrri")>;
+ "MMX_PH(ADD|SUB)S?Wrm")>;
def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
let Latency = 7;
@@ -541,8 +549,6 @@ def : InstRW<[AtomWrite01_7], (instrs AA
MMX_CVTPD2PIirr,
MMX_CVTPI2PDirr,
MMX_CVTTPD2PIirr)>;
-def : InstRW<[AtomWrite01_7], (instregex "(ADD|SUB|MAX|MIN)PDrm",
- "CMPPDrmi")>;
def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> {
let Latency = 8;
@@ -571,8 +577,7 @@ def : InstRW<[AtomWrite01_9], (instrs BT
SHLD64mrCL, SHRD64mrCL,
SHLD64mri8, SHRD64mri8,
SHLD64rri8, SHRD64rri8,
- CMPXCHG8rr,
- MULPDrr)>;
+ CMPXCHG8rr)>;
def : InstRW<[AtomWrite01_9], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F",
"(U)?COM_FI", "TST_F",
"(U)?COMIS(D|S)rr",
@@ -582,8 +587,7 @@ def AtomWrite01_10 : SchedWriteRes<[Atom
let Latency = 10;
let ResourceCycles = [10];
}
-def : InstRW<[AtomWrite01_10], (instrs FLDL2E, FLDL2T, FLDLG2, FLDLN2, FLDPI,
- MULPDrm)>;
+def : InstRW<[AtomWrite01_10], (instrs FLDL2E, FLDL2T, FLDLG2, FLDLN2, FLDPI)>;
def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
"CVT(T)?SS2SI64rm(_Int)?")>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Mon May 7 13:52:53 2018
@@ -317,12 +317,24 @@ def : WriteRes<WriteFMove,
def : WriteRes<WriteEMMS, [JFPU01, JFPX]> { let Latency = 2; }
defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>;
+defm : JWriteResFpuPair<WriteFAddX, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFAddY, [JFPU0, JFPA], 3, [2,2], 2>;
+defm : JWriteResFpuPair<WriteFAdd64, [JFPU0, JFPA], 3>;
+defm : JWriteResFpuPair<WriteFAdd64X, [JFPU0, JFPA], 3>;
+defm : JWriteResYMMPair<WriteFAdd64Y, [JFPU0, JFPA], 3, [2,2], 2>;
defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>;
+defm : JWriteResFpuPair<WriteFCmpX, [JFPU0, JFPA], 2>;
defm : JWriteResYMMPair<WriteFCmpY, [JFPU0, JFPA], 2, [2,2], 2>;
+defm : JWriteResFpuPair<WriteFCmp64, [JFPU0, JFPA], 2>;
+defm : JWriteResFpuPair<WriteFCmp64X, [JFPU0, JFPA], 2>;
+defm : JWriteResYMMPair<WriteFCmp64Y, [JFPU0, JFPA], 2, [2,2], 2>;
defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
+defm : JWriteResFpuPair<WriteFMulX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFMulY, [JFPU1, JFPM], 2, [2,2], 2>;
+defm : JWriteResFpuPair<WriteFMul64, [JFPU1, JFPM], 4, [1,2]>;
+defm : JWriteResFpuPair<WriteFMul64X, [JFPU1, JFPM], 4, [1,2]>;
+defm : JWriteResYMMPair<WriteFMul64Y, [JFPU1, JFPM], 4, [2,4], 2>;
defm : JWriteResFpuPair<WriteFMA, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteFMAX, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteFMAY, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
@@ -552,32 +564,6 @@ def : InstRW<[JWriteCVTPH2PSYLd], (instr
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
-def JWriteVMULYPD: SchedWriteRes<[JFPU1, JFPM]> {
- let Latency = 4;
- let ResourceCycles = [2, 4];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteVMULYPD], (instrs VMULPDYrr)>;
-
-def JWriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
- let Latency = 9;
- let ResourceCycles = [2, 2, 4];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteVMULYPDLd, ReadAfterLd], (instrs VMULPDYrm)>;
-
-def JWriteVMULPD: SchedWriteRes<[JFPU1, JFPM]> {
- let Latency = 4;
- let ResourceCycles = [1, 2];
-}
-def : InstRW<[JWriteVMULPD], (instrs MULPDrr, MULSDrr, VMULPDrr, VMULSDrr)>;
-
-def JWriteVMULPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
- let Latency = 9;
- let ResourceCycles = [1, 1, 2];
-}
-def : InstRW<[JWriteVMULPDLd], (instrs MULPDrm, MULSDrm, VMULPDrm, VMULSDrm)>;
-
def JWriteVCVTY: SchedWriteRes<[JFPU1, JSTC]> {
let Latency = 3;
let ResourceCycles = [2, 2];
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Mon May 7 13:52:53 2018
@@ -130,13 +130,25 @@ def : WriteRes<WriteFLoad, [SLM_
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
-defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteFCmpY, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
-defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAdd64, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmpX, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmpY, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmp64, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmp64X, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmp64Y, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMul64, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Mon May 7 13:52:53 2018
@@ -193,9 +193,17 @@ def : WriteRes<WriteFMove,
def : WriteRes<WriteFLoad, [ZnAGU]> { let Latency = 8; }
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFCmpX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmpY, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFCmp64, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>;
@@ -224,7 +232,11 @@ defm : ZnWriteResFpuPair<WriteFShuffleY,
defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU01], 3, [1], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFMulX, [ZnFPU01], 3, [1], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFMulY, [ZnFPU01], 4, [1], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFMul64, [ZnFPU01], 3, [1], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFMul64X, [ZnFPU01], 3, [1], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFMul64Y, [ZnFPU01], 4, [1], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>;
defm : ZnWriteResFpuPair<WriteFMAX, [ZnFPU03], 5>;
defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>;
Modified: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll Mon May 7 13:52:53 2018
@@ -29,7 +29,7 @@ define <2 x double> @test_addsubpd(<2 x
; ATOM-LABEL: test_addsubpd:
; ATOM: # %bb.0:
; ATOM-NEXT: addsubpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:3.00]
+; ATOM-NEXT: addsubpd (%rdi), %xmm0 # sched: [7:3.50]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_addsubpd:
Modified: llvm/trunk/test/tools/llvm-mca/X86/Atom/resources-sse2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Atom/resources-sse2.s?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Atom/resources-sse2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Atom/resources-sse2.s Mon May 7 13:52:53 2018
@@ -407,7 +407,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 addsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * addsd (%rax), %xmm2
# CHECK-NEXT: 1 6 3.00 addsubpd %xmm0, %xmm2
-# CHECK-NEXT: 1 6 3.00 * addsubpd (%rax), %xmm2
+# CHECK-NEXT: 1 7 3.50 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 andnpd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * andnpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 andpd %xmm0, %xmm2
@@ -673,7 +673,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
-# CHECK-NEXT: 872.50 644.50
+# CHECK-NEXT: 873.00 645.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
@@ -682,7 +682,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 5.00 - addsd %xmm0, %xmm2
# CHECK-NEXT: 5.00 - addsd (%rax), %xmm2
# CHECK-NEXT: 3.00 3.00 addsubpd %xmm0, %xmm2
-# CHECK-NEXT: 3.00 3.00 addsubpd (%rax), %xmm2
+# CHECK-NEXT: 3.50 3.50 addsubpd (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 andnpd %xmm0, %xmm2
# CHECK-NEXT: 1.00 - andnpd (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 andpd %xmm0, %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/Atom/resources-sse3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Atom/resources-sse3.s?rev=331672&r1=331671&r2=331672&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Atom/resources-sse3.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Atom/resources-sse3.s Mon May 7 13:52:53 2018
@@ -40,7 +40,7 @@ movsldup (%rax), %xmm2
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 6 3.00 addsubpd %xmm0, %xmm2
-# CHECK-NEXT: 1 6 3.00 * addsubpd (%rax), %xmm2
+# CHECK-NEXT: 1 7 3.50 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 addsubps %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * addsubps (%rax), %xmm2
# CHECK-NEXT: 1 8 4.00 haddpd %xmm0, %xmm2
@@ -65,12 +65,12 @@ movsldup (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
-# CHECK-NEXT: 57.50 41.50
+# CHECK-NEXT: 58.00 42.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
# CHECK-NEXT: 3.00 3.00 addsubpd %xmm0, %xmm2
-# CHECK-NEXT: 3.00 3.00 addsubpd (%rax), %xmm2
+# CHECK-NEXT: 3.50 3.50 addsubpd (%rax), %xmm2
# CHECK-NEXT: 5.00 - addsubps %xmm0, %xmm2
# CHECK-NEXT: 5.00 - addsubps (%rax), %xmm2
# CHECK-NEXT: 4.00 4.00 haddpd %xmm0, %xmm2
More information about the llvm-commits
mailing list