[llvm] r332376 - [X86] Split WriteCvtF2F into F32->F64 and F64->F32 scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue May 15 10:36:49 PDT 2018
Author: rksimon
Date: Tue May 15 10:36:49 2018
New Revision: 332376
URL: http://llvm.org/viewvc/llvm-project?rev=332376&view=rev
Log:
[X86] Split WriteCvtF2F into F32->F64 and F64->F32 scheduler classes
BtVer2 - Fixes schedules for (V)CVTPS2PD instructions
A lot of the Intel models still have too many InstRW overrides for these new classes - this needs cleaning up but I wanted to get the classes in first
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue May 15 10:36:49 2018
@@ -7183,10 +7183,10 @@ multiclass avx512_cvt_fp_scalar_ss2sd<bi
}
}
defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
- X86froundRnd, WriteCvtF2F, f64x_info,
+ X86froundRnd, WriteCvtSD2SS, f64x_info,
f32x_info>, NotMemoryFoldable;
defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
- X86fpextRnd, WriteCvtF2F, f32x_info,
+ X86fpextRnd, WriteCvtSS2SD, f32x_info,
f64x_info>, NotMemoryFoldable;
def : Pat<(f64 (fpextend FR32X:$src)),
@@ -7277,33 +7277,33 @@ multiclass avx512_vcvt_fp_rc<bits<8> opc
// Extend Float to Double
multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
- X86FoldableSchedWrite sched> {
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
- fpextend, sched>,
+ fpextend, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
- X86vfpextRnd, sched>, EVEX_V512;
+ X86vfpextRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
- X86vfpext, sched, "{1to2}", "", f64mem>, EVEX_V128;
+ X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
- sched>, EVEX_V256;
+ sched.YMM>, EVEX_V256;
}
}
// Truncate Double to Float
-multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched> {
+multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched>,
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
- X86vfproundRnd, sched>, EVEX_V512;
+ X86vfproundRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
- X86vfpround, sched, "{1to2}", "{x}">, EVEX_V128;
+ X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
- sched, "{1to4}", "{y}">, EVEX_V256;
+ sched.YMM, "{1to4}", "{y}">, EVEX_V256;
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
@@ -7316,9 +7316,9 @@ multiclass avx512_cvtpd2ps<bits<8> opc,
}
}
-defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", WriteCvtF2F>,
+defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
VEX_W, PD, EVEX_CD8<64, CD8VF>;
-defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", WriteCvtF2F>,
+defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
PS, EVEX_CD8<32, CD8VH>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
@@ -7864,7 +7864,7 @@ multiclass avx512_cvtph2ps_sae<X86Vector
let Predicates = [HasAVX512] in
defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
WriteCvtPH2PSY>,
- avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtF2F>,
+ avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSY>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue May 15 10:36:49 2018
@@ -1241,16 +1241,16 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $ds
// Convert scalar double to scalar single
let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
- (ins FR32:$src1, FR64:$src2),
- "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V, VEX_LIG,
- Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable;
+ (ins FR32:$src1, FR64:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
+ Sched<[WriteCvtSD2SS]>;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f64mem:$src2),
- "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XD, VEX_4V, VEX_LIG,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable;
+ (ins FR32:$src1, f64mem:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
+ Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
}
def : Pat<(f32 (fpround FR64:$src)),
@@ -1260,12 +1260,12 @@ def : Pat<(f32 (fpround FR64:$src)),
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fpround FR64:$src))]>,
- Sched<[WriteCvtF2F]>;
+ Sched<[WriteCvtSD2SS]>;
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
- "cvtsd2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
- XD,
- Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
+ "cvtsd2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
+ XD, Requires<[UseSSE2, OptForSize]>,
+ Sched<[WriteCvtSD2SS.Folded]>;
let isCodeGenOnly = 1 in {
def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
@@ -1273,30 +1273,29 @@ def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
- XD, VEX_4V, VEX_WIG,
- Requires<[HasAVX]>, Sched<[WriteCvtF2F]>;
+ XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
+ Sched<[WriteCvtSD2SS]>;
def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
- XD, VEX_4V, VEX_WIG,
- Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
-
+ XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
+ Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
- XD, Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
+ XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, Requires<[UseSSE2]>,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
}
} // isCodeGenOnly = 1
@@ -1306,14 +1305,14 @@ let hasSideEffects = 0, Predicates = [Us
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XS, VEX_4V, VEX_LIG,
- Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable;
+ XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
+ Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XS, VEX_4V, VEX_LIG,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable;
+ XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable,
+ Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
}
def : Pat<(f64 (fpextend FR32:$src)),
@@ -1331,11 +1330,12 @@ def : Pat<(extloadf32 addr:$src),
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fpextend FR32:$src))]>,
- XS, Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
+ XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))]>,
- XS, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
+ XS, Requires<[UseSSE2, OptForSize]>,
+ Sched<[WriteCvtSS2SD.Folded]>;
// extload f32 -> f64. This matches load+fpextend because we have a hack in
// the isel (PreprocessForFPConvert) that can introduce loads after dag
@@ -1352,25 +1352,25 @@ def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, VEX_WIG,
- Requires<[HasAVX]>, Sched<[WriteCvtF2F]>;
+ Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, VEX_WIG,
- Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ []>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
+ Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[]>, XS, Requires<[UseSSE2]>,
- Sched<[WriteCvtF2F]>;
+ Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[]>, XS, Requires<[UseSSE2]>,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
}
} // isCodeGenOnly = 1
@@ -1699,30 +1699,30 @@ let Predicates = [HasAVX, NoVLX] in {
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
- PS, VEX, Sched<[WriteCvtF2F]>, VEX_WIG;
+ PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
- PS, VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG;
+ PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))]>,
- PS, VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG;
+ PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>,
- PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG;
+ PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG;
}
let Predicates = [UseSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
- PS, Sched<[WriteCvtF2F]>;
+ PS, Sched<[WriteCvtPS2PD]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
- PS, Sched<[WriteCvtF2FLd]>;
+ PS, Sched<[WriteCvtPS2PD.Folded]>;
}
// Convert Packed DW Integers to Packed Double FP
@@ -1787,7 +1787,7 @@ let Predicates = [HasAVX, NoVLX] in
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
- VEX, Sched<[WriteCvtF2F]>, VEX_WIG;
+ VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
@@ -1796,7 +1796,7 @@ let Predicates = [HasAVX, NoVLX] in
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>,
- VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG;
+ VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
(VCVTPD2PSrm VR128:$dst, f128mem:$src), 0, "intel">;
@@ -1805,11 +1805,11 @@ let Predicates = [HasAVX, NoVLX] in {
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (fpround VR256:$src))]>,
- VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (fpround (loadv4f64 addr:$src)))]>,
- VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
}
def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
@@ -1819,11 +1819,11 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
- Sched<[WriteCvtF2F]>;
+ Sched<[WriteCvtPD2PS]>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
- Sched<[WriteCvtF2FLd]>;
+ Sched<[WriteCvtPD2PS.Folded]>;
// AVX 256-bit register conversion intrinsics
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Tue May 15 10:36:49 2018
@@ -343,9 +343,16 @@ def : WriteRes<WriteVecExtractSt, [BWPor
}
// Conversion between integer and float.
-defm : BWWriteResPair<WriteCvtF2I, [BWPort1], 3>; // Float -> Integer.
-defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
-defm : BWWriteResPair<WriteCvtF2F, [BWPort1], 3>; // Float -> Float size conversion.
+defm : BWWriteResPair<WriteCvtF2I, [BWPort1], 3>; // Float -> Integer.
+defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
+
+defm : BWWriteResPair<WriteCvtSS2SD, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPS2PD, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>;
+
+defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1], 3>;
defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Tue May 15 10:36:49 2018
@@ -222,9 +222,6 @@ defm : HWWriteResPair<WriteFSqrt64Y, [HW
defm : HWWriteResPair<WriteFSqrt64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>;
defm : HWWriteResPair<WriteFSqrt80, [HWPort0,HWFPDivider], 23, [1,17]>;
-defm : HWWriteResPair<WriteCvtF2I, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
-defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
@@ -251,6 +248,18 @@ defm : HWWriteResPair<WriteFVarShuffle25
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
+// Conversion between integer and float.
+defm : HWWriteResPair<WriteCvtF2I, [HWPort1], 3>;
+defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
+
+defm : HWWriteResPair<WriteCvtSS2SD, [HWPort1], 3>;
+defm : HWWriteResPair<WriteCvtPS2PD, [HWPort1], 3>;
+defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort1], 3>;
+
+defm : HWWriteResPair<WriteCvtSD2SS, [HWPort1], 3>;
+defm : HWWriteResPair<WriteCvtPD2PS, [HWPort1], 3>;
+defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1], 3>;
+
defm : X86WriteRes<WriteCvtPH2PS, [HWPort0,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [HWPort0,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort0,HWPort23], 6, [1,1], 2>;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Tue May 15 10:36:49 2018
@@ -217,9 +217,6 @@ defm : SBWriteResPair<WriteFSqrt80, [SB
defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>;
defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>;
defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>;
-defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
-defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
-defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>;
@@ -236,8 +233,20 @@ defm : SBWriteResPair<WriteFBlendY, [S
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
-defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
-defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
+// Conversion between integer and float.
+defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
+defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
+
+defm : SBWriteResPair<WriteCvtSS2SD, [SBPort0], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteCvtPS2PD, [SBPort0,SBPort5], 2, [1,1], 2>;
+defm : SBWriteResPair<WriteCvtPS2PDY, [SBPort0,SBPort5], 2, [1,1], 2>;
+
+defm : SBWriteResPair<WriteCvtSD2SS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
+defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
+defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
+
+defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
+defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
@@ -450,13 +459,6 @@ defm : SBWriteResPair<WriteFMAY, [SBPort
// Remaining SNB instrs.
-def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> {
- let Latency = 1;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup0], (instregex "(V?)CVTSS2SDrr")>;
-
def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
let Latency = 1;
let NumMicroOps = 1;
@@ -555,13 +557,6 @@ def: InstRW<[SBWriteResGroup12], (instre
"(V?)UCOMISDrr",
"(V?)UCOMISSrr")>;
-def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort5]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup13], (instregex "(V?)CVTPS2PD(Y?)rr")>;
-
def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -699,8 +694,6 @@ def SBWriteResGroup28 : SchedWriteRes<[S
def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPI2PDirr",
"MMX_CVT(T?)PD2PIirr",
"(V?)CVTDQ2PD(Y?)rr",
- "(V?)CVTPD2PS(Y?)rr",
- "(V?)CVTSD2SSrr",
"(V?)CVTSI(64)?2SDrr",
"(V?)CVT(T?)PD2DQ(Y?)rr")>;
@@ -903,8 +896,7 @@ def SBWriteResGroup55 : SchedWriteRes<[S
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup55], (instregex "(V?)CVTPS2PD(Y?)rm",
- "(V?)CVTSS2SDrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "(V?)CVTPS2PD(Y?)rm")>;
def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 7;
@@ -1180,8 +1172,6 @@ def SBWriteResGroup103 : SchedWriteRes<[
def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPI2PDirm",
"MMX_CVT(T?)PD2PIirm",
"(V?)CVTDQ2PD(Y?)rm",
- "(V?)CVTPD2PSrm",
- "(V?)CVTSD2SSrm",
"(V?)CVTSI(64)?2SSrm",
"(V?)CVT(T?)PD2DQrm")>;
@@ -1212,8 +1202,7 @@ def SBWriteResGroup107 : SchedWriteRes<[
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2PSYrm",
- "VCVT(T?)PD2DQYrm")>;
+def: InstRW<[SBWriteResGroup107], (instregex "VCVT(T?)PD2DQYrm")>;
def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 12;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Tue May 15 10:36:49 2018
@@ -338,7 +338,14 @@ def : WriteRes<WriteVecExtractSt, [SKLPo
// Conversion between integer and float.
defm : SKLWriteResPair<WriteCvtF2I, [SKLPort1], 3>; // Float -> Integer.
defm : SKLWriteResPair<WriteCvtI2F, [SKLPort1], 4>; // Integer -> Float.
-defm : SKLWriteResPair<WriteCvtF2F, [SKLPort1], 3>; // Float -> Float size conversion.
+
+defm : SKLWriteResPair<WriteCvtSS2SD, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteCvtPS2PD, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteCvtPS2PDY, [SKLPort1], 3>;
+
+defm : SKLWriteResPair<WriteCvtSD2SS, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteCvtPD2PS, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteCvtPD2PSY, [SKLPort1], 3>;
defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort015], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Tue May 15 10:36:49 2018
@@ -336,9 +336,16 @@ def : WriteRes<WriteVecExtractSt, [SKXPo
}
// Conversion between integer and float.
-defm : SKXWriteResPair<WriteCvtF2I, [SKXPort1], 3>; // Float -> Integer.
-defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>; // Integer -> Float.
-defm : SKXWriteResPair<WriteCvtF2F, [SKXPort1], 3>; // Float -> Float size conversion.
+defm : SKXWriteResPair<WriteCvtF2I, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>;
+
+defm : SKXWriteResPair<WriteCvtSS2SD, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteCvtPS2PD, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort1], 3>;
+
+defm : SKXWriteResPair<WriteCvtSD2SS, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteCvtPD2PS, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort1], 3>;
defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort015], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort015], 7, [1,1], 2>;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Tue May 15 10:36:49 2018
@@ -298,7 +298,14 @@ def WriteMMXMOVMSK : SchedWrite;
// Conversion between integer and float.
defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
-defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
+
+defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion.
+defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM).
+defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM/ZMM).
+
+defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion.
+defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM).
+defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM/ZMM).
defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM/ZMM).
@@ -451,6 +458,13 @@ def SchedWriteFVarBlend
: X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
WriteFVarBlendY, WriteFVarBlendY>;
+def SchedWriteCvtPS2PD
+ : X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
+ WriteCvtPS2PDY, WriteCvtPS2PDY>;
+def SchedWriteCvtPD2PS
+ : X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
+ WriteCvtPD2PSY, WriteCvtPD2PSY>;
+
def SchedWriteVecALU
: X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUY>;
def SchedWritePHAdd
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Tue May 15 10:36:49 2018
@@ -273,9 +273,16 @@ defm : AtomWriteResPair<WriteFVarShuffle
// Conversions.
////////////////////////////////////////////////////////////////////////////////
-defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
-defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
-defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
+defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+
+defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WriteCvtPS2PDY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+
+defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WriteCvtPD2PSY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtPH2PS, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteCvtPH2PSY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
@@ -571,9 +578,7 @@ def AtomWrite01_7 : SchedWriteRes<[AtomP
def : InstRW<[AtomWrite01_7], (instrs AAD8i8,
CVTDQ2PDrr,
CVTPD2DQrr,
- CVTPD2PSrr,
CVTPS2DQrm,
- CVTPS2PDrr,
CVTTPD2DQrr,
CVTTPS2DQrm,
MMX_CVTPD2PIirr,
@@ -590,8 +595,6 @@ def : InstRW<[AtomWrite01_8], (instrs LO
FNSTCW16m,
CVTDQ2PDrm,
CVTPD2DQrm,
- CVTPD2PSrm,
- CVTPS2PDrm,
CVTTPD2DQrm,
MMX_CVTPD2PIirm,
MMX_CVTPI2PDirm,
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Tue May 15 10:36:49 2018
@@ -360,9 +360,16 @@ defm : JWriteResFpuPair<WriteFVarShuffle
// Conversions.
////////////////////////////////////////////////////////////////////////////////
-defm : JWriteResFpuPair<WriteCvtF2I, [JFPU1, JSTC], 3>; // Float -> Integer.
-defm : JWriteResFpuPair<WriteCvtI2F, [JFPU1, JSTC], 3>; // Integer -> Float.
-defm : JWriteResFpuPair<WriteCvtF2F, [JFPU1, JSTC], 3>; // Float -> Float size conversion.
+defm : JWriteResFpuPair<WriteCvtF2I, [JFPU1, JSTC], 3>;
+defm : JWriteResFpuPair<WriteCvtI2F, [JFPU1, JSTC], 3>;
+
+defm : JWriteResFpuPair<WriteCvtSS2SD, [JFPU1, JSTC], 7, [1,2], 2>;
+defm : JWriteResFpuPair<WriteCvtPS2PD, [JFPU1, JSTC], 2, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPS2PDY, [JFPU1, JSTC], 2, [2,2], 2>;
+
+defm : JWriteResFpuPair<WriteCvtSD2SS, [JFPU1, JSTC], 7, [1,2], 2>;
+defm : JWriteResFpuPair<WriteCvtPD2PS, [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPD2PSY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
defm : JWriteResFpuPair<WriteCvtPH2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPH2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
@@ -372,20 +379,6 @@ defm : X86WriteRes<WriteCvtPS2PHY,
defm : X86WriteRes<WriteCvtPS2PHSt, [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
-def JWriteCVTF2F : SchedWriteRes<[JFPU1, JSTC]> {
- let Latency = 7;
- let ResourceCycles = [1, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteCVTF2F], (instregex "(V)?CVTS(D|S)2S(D|S)rr")>;
-
-def JWriteCVTF2FLd : SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
- let Latency = 12;
- let ResourceCycles = [1, 1, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteCVTF2FLd], (instregex "(V)?CVTS(D|S)2S(D|S)rm")>;
-
def JWriteCVTF2SI : SchedWriteRes<[JFPU1, JSTC, JFPA, JALU0]> {
let Latency = 7;
let NumMicroOps = 2;
@@ -560,14 +553,14 @@ def JWriteVCVTPDY: SchedWriteRes<[JFPU1,
let ResourceCycles = [2, 2, 4];
let NumMicroOps = 3;
}
-def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr, VCVTPD2PSYrr)>;
+def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr)>;
def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> {
let Latency = 11;
let ResourceCycles = [2, 2, 2, 4];
let NumMicroOps = 3;
}
-def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
+def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm)>;
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Tue May 15 10:36:49 2018
@@ -197,9 +197,6 @@ defm : SLMWriteResPair<WriteFSqrt80, [S
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>;
@@ -213,6 +210,18 @@ defm : SLMWriteResPair<WriteFVarShuffle,
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
+// Conversion between integer and float.
+defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
+
+defm : SLMWriteResPair<WriteCvtSS2SD, [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPS2PD, [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
+
+defm : SLMWriteResPair<WriteCvtSD2SS, [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPD2PS, [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
+
// Vector integer operations.
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Tue May 15 10:36:49 2018
@@ -227,7 +227,6 @@ defm : ZnWriteResFpuPair<WriteFVarBlendY
defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>;
defm : ZnWriteResFpuPair<WriteVarBlendY, [ZnFPU0], 1>;
defm : ZnWriteResFpuPair<WriteCvtI2F, [ZnFPU3], 5>;
-defm : ZnWriteResFpuPair<WriteCvtF2F, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>;
@@ -1186,9 +1185,16 @@ def : InstRW<[WriteMicrocoded], (instreg
def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> {
let Latency = 4;
}
+def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> {
+ let Latency = 5;
+}
+
// CVTPD2PS.
// x,x.
-def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(V?)CVTPD2PSrr")>;
+def : SchedAlias<WriteCvtPD2PS, ZnWriteCVTPD2PSr>;
+
+// y,y.
+def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>;
def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
let Latency = 11;
@@ -1196,28 +1202,28 @@ def ZnWriteCVTPD2PSLd: SchedWriteRes<[Zn
let ResourceCycles = [1,2];
}
// x,m128.
-def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(V?)CVTPD2PS(X?)rm")>;
+def : SchedAlias<WriteCvtPD2PSLd, ZnWriteCVTPD2PSLd>;
// x,m256.
def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let Latency = 11;
}
-def : InstRW<[ZnWriteCVTPD2PSYLd], (instregex "(V?)CVTPD2PSYrm")>;
+def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>;
// CVTSD2SS.
// x,x.
// Same as WriteCVTPD2PSr
-def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(V)?CVTSD2SSrr")>;
+def : SchedAlias<WriteCvtSD2SS, ZnWriteCVTPD2PSr>;
// x,m64.
-def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(V)?CVTSD2SSrm")>;
+def : SchedAlias<WriteCvtSD2SSLd, ZnWriteCVTPD2PSLd>;
// CVTPS2PD.
// x,x.
def ZnWriteCVTPS2PDr : SchedWriteRes<[ZnFPU3]> {
let Latency = 3;
}
-def : InstRW<[ZnWriteCVTPS2PDr], (instregex "(V?)CVTPS2PDrr")>;
+def : SchedAlias<WriteCvtPS2PD, ZnWriteCVTPS2PDr>;
// x,m64.
// y,m128.
@@ -1225,20 +1231,21 @@ def ZnWriteCVTPS2PDLd : SchedWriteRes<[Z
let Latency = 10;
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteCVTPS2PDLd], (instregex "(V?)CVTPS2PD(Y?)rm")>;
+def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>;
+def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>;
// y,x.
def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> {
let Latency = 3;
}
-def : InstRW<[ZnWriteVCVTPS2PDY], (instregex "VCVTPS2PDYrr")>;
+def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>;
// CVTSS2SD.
// x,x.
def ZnWriteCVTSS2SDr : SchedWriteRes<[ZnFPU3]> {
let Latency = 4;
}
-def : InstRW<[ZnWriteCVTSS2SDr], (instregex "(V?)CVTSS2SDrr")>;
+def : SchedAlias<WriteCvtSS2SD, ZnWriteCVTSS2SDr>;
// x,m32.
def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
@@ -1246,7 +1253,7 @@ def ZnWriteCVTSS2SDLd : SchedWriteRes<[Z
let NumMicroOps = 2;
let ResourceCycles = [1, 2];
}
-def : InstRW<[ZnWriteCVTSS2SDLd], (instregex "(V?)CVTSS2SDrm")>;
+def : SchedAlias<WriteCvtSS2SDLd, ZnWriteCVTSS2SDLd>;
def ZnWriteCVTDQ2PDr: SchedWriteRes<[ZnFPU12,ZnFPU3]> {
let Latency = 5;
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Tue May 15 10:36:49 2018
@@ -1752,8 +1752,8 @@ define <4 x i32> @f64to4si(<4 x double>
define <16 x float> @f64to16f32(<16 x double> %b) nounwind {
; GENERIC-LABEL: f64to16f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [3:1.00]
+; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
+; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [4:1.00]
; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1788,7 +1788,7 @@ define <4 x float> @f64to4f32_mask(<4 x
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [3:1.00]
+; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1823,7 +1823,7 @@ define <4 x float> @f64tof32_inreg(<2 x
define <8 x double> @f32to8f64(<8 x float> %b) nounwind {
; GENERIC-LABEL: f32to8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to8f64:
@@ -1838,7 +1838,7 @@ define <4 x double> @f32to4f64_mask(<4 x
; GENERIC-LABEL: f32to4f64_mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00]
+; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [2:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to4f64_mask:
@@ -4216,7 +4216,7 @@ define <8 x i64> @zext_8x32_to_8x64mask(
define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
; GENERIC-LABEL: fptrunc_test:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: fptrunc_test:
@@ -4230,7 +4230,7 @@ define <8 x float> @fptrunc_test(<8 x do
define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
; GENERIC-LABEL: fpext_test:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: fpext_test:
Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Tue May 15 10:36:49 2018
@@ -1818,15 +1818,15 @@ define <2 x double> @test_cvtps2pd(<4 x
;
; BTVER2-SSE-LABEL: test_cvtps2pd:
; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_cvtps2pd:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
+; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s Tue May 15 10:36:49 2018
@@ -1122,10 +1122,10 @@ vzeroupper
# CHECK-NEXT: 1 8 1.00 * vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: 2 8 2.00 * vcvtps2dq (%rax), %ymm2
-# CHECK-NEXT: 1 3 1.00 vcvtps2pd %xmm0, %xmm2
-# CHECK-NEXT: 1 8 1.00 * vcvtps2pd (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 vcvtps2pd %xmm0, %ymm2
-# CHECK-NEXT: 1 8 1.00 * vcvtps2pd (%rax), %ymm2
+# CHECK-NEXT: 1 2 1.00 vcvtps2pd %xmm0, %xmm2
+# CHECK-NEXT: 1 7 1.00 * vcvtps2pd (%rax), %xmm2
+# CHECK-NEXT: 2 2 2.00 vcvtps2pd %xmm0, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: 2 7 1.00 vcvtsd2si %xmm0, %ecx
# CHECK-NEXT: 2 7 1.00 vcvtsd2si %xmm0, %rcx
# CHECK-NEXT: 2 12 1.00 * vcvtsd2si (%rax), %ecx
@@ -1720,7 +1720,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 48.00 2.00 - 349.50 909.50 397.00 409.00 381.00 - 43.00 122.00 118.50 118.50 38.00
+# CHECK-NEXT: 48.00 2.00 - 349.50 909.50 397.00 411.00 382.00 - 43.00 124.00 118.50 118.50 38.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -1830,8 +1830,8 @@ vzeroupper
# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvtps2dq (%rax), %ymm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtps2pd (%rax), %xmm2
-# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2pd %xmm0, %ymm2
-# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtps2pd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vcvtps2pd %xmm0, %ymm2
+# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: 1.00 - - 1.00 - - 1.00 - - - 1.00 - - - vcvtsd2si %xmm0, %ecx
# CHECK-NEXT: 1.00 - - 1.00 - - 1.00 - - - 1.00 - - - vcvtsd2si %xmm0, %rcx
# CHECK-NEXT: 1.00 - - 1.00 - - 1.00 1.00 - - 1.00 - - - vcvtsd2si (%rax), %ecx
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s?rev=332376&r1=332375&r2=332376&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s Tue May 15 10:36:49 2018
@@ -431,8 +431,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 8 1.00 * cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * cvtps2dq (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 cvtps2pd %xmm0, %xmm2
-# CHECK-NEXT: 1 8 1.00 * cvtps2pd (%rax), %xmm2
+# CHECK-NEXT: 1 2 1.00 cvtps2pd %xmm0, %xmm2
+# CHECK-NEXT: 1 7 1.00 * cvtps2pd (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 cvtsd2si %xmm0, %ecx
# CHECK-NEXT: 2 7 1.00 cvtsd2si %xmm0, %rcx
# CHECK-NEXT: 2 12 1.00 * cvtsd2si (%rax), %ecx
More information about the llvm-commits
mailing list