[llvm] r330179 - [X86] Add FP comparison scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 17 00:22:45 PDT 2018
Author: rksimon
Date: Tue Apr 17 00:22:44 2018
New Revision: 330179
URL: http://llvm.org/viewvc/llvm-project?rev=330179&view=rev
Log:
[X86] Add FP comparison scheduler classes
Split VCMP/VMAX/VMIN instructions off to WriteFCmp and VCOMIS instructions off to WriteFCom instead of assuming they match WriteFAdd
Differential Revision: https://reviews.llvm.org/D45656
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFPStack.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/x87-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Apr 17 00:22:44 2018
@@ -2051,10 +2051,10 @@ multiclass avx512_cmp_scalar<X86VectorVT
let Predicates = [HasAVX512] in {
let ExeDomain = SSEPackedSingle in
defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
- WriteFAdd>, AVX512XSIi8Base;
+ WriteFCmp>, AVX512XSIi8Base;
let ExeDomain = SSEPackedDouble in
defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
- WriteFAdd>, AVX512XDIi8Base, VEX_W;
+ WriteFCmp>, AVX512XDIi8Base, VEX_W;
}
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
@@ -2511,9 +2511,9 @@ multiclass avx512_vcmp<X86FoldableSchedW
}
}
-defm VCMPPD : avx512_vcmp<WriteFAdd, avx512vl_f64_info>,
+defm VCMPPD : avx512_vcmp<WriteFCmp, avx512vl_f64_info>,
AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
-defm VCMPPS : avx512_vcmp<WriteFAdd, avx512vl_f32_info>,
+defm VCMPPS : avx512_vcmp<WriteFCmp, avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
@@ -4906,9 +4906,9 @@ defm VMUL : avx512_binop_s_round<0x59, "
defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, WriteFAdd, 0>;
defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, WriteFDiv, 0>;
defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
- WriteFAdd, 0>;
+ WriteFCmp, 0>;
defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
- WriteFAdd, 0>;
+ WriteFCmp, 0>;
// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
// X86fminc and X86fmaxc instead of X86fmin and X86fmax
@@ -4932,19 +4932,19 @@ multiclass avx512_comutable_binop_s<bits
}
}
defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
- WriteFAdd>, XS, EVEX_4V, VEX_LIG,
+ WriteFCmp>, XS, EVEX_4V, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
- WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG,
+ WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>;
defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
- WriteFAdd>, XS, EVEX_4V, VEX_LIG,
+ WriteFCmp>, XS, EVEX_4V, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
- WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG,
+ WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>;
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
@@ -5050,13 +5050,13 @@ defm VSUB : avx512_fp_binop_p<0x5C, "vsu
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>;
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, WriteFDiv>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>;
-defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFAdd, 0>,
- avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFAdd>;
-defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFAdd, 0>,
- avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFAdd>;
+defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFCmp, 0>,
+ avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFCmp>;
+defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFCmp, 0>,
+ avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFCmp>;
let isCodeGenOnly = 1 in {
- defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFAdd, 1>;
- defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFAdd, 1>;
+ defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFCmp, 1>;
+ defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFCmp, 1>;
}
defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, WriteFAdd, 1>;
defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFAdd, 0>;
@@ -7732,44 +7732,44 @@ multiclass avx512_ord_cmp_sae<bits<8> op
}
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
- defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFAdd>,
+ defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
- defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFAdd>,
+ defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
- defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFAdd>,
+ defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
- defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFAdd>,
+ defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
- "ucomiss", WriteFAdd>, PS, EVEX, VEX_LIG,
+ "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
- "ucomisd", WriteFAdd>, PD, EVEX,
+ "ucomisd", WriteFCom>, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
let Pattern = []<dag> in {
defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
- "comiss", WriteFAdd>, PS, EVEX, VEX_LIG,
+ "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
- "comisd", WriteFAdd>, PD, EVEX,
+ "comisd", WriteFCom>, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let isCodeGenOnly = 1 in {
defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", WriteFAdd>, PS, EVEX, VEX_LIG,
+ sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", WriteFAdd>, PD, EVEX,
+ sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", WriteFAdd>, PS, EVEX, VEX_LIG,
+ sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", WriteFAdd>, PD, EVEX,
+ sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
}
Modified: llvm/trunk/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFPStack.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFPStack.td Tue Apr 17 00:22:44 2018
@@ -277,6 +277,8 @@ def SUB_FPrST0 : FPrST0PInst<MRM5r, "fs
def SUB_FST0r : FPST0rInst <MRM4r, "fsub\t$op">;
def SUBR_FrST0 : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
+} // SchedRW
+let SchedRW = [WriteFCom] in {
def COM_FST0r : FPST0rInst <MRM2r, "fcom\t$op">;
def COMP_FST0r : FPST0rInst <MRM3r, "fcomp\t$op">;
} // SchedRW
@@ -320,7 +322,7 @@ defm SIN : FPUnary<fsin, MRM_FE, "fsin">
defm COS : FPUnary<fcos, MRM_FF, "fcos">;
}
-let SchedRW = [WriteFAdd] in {
+let SchedRW = [WriteFCom] in {
let hasSideEffects = 0 in {
def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
@@ -333,7 +335,7 @@ def TST_F : FPI<0xD9, MRM_E4, (outs), (
// Versions of FP instructions that take a single memory operand. Added for the
// disassembler; remove as they are included with patterns elsewhere.
-let SchedRW = [WriteFAddLd] in {
+let SchedRW = [WriteFComLd] in {
def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
@@ -568,7 +570,7 @@ def LD_F1 : FPI<0xD9, MRM_E8, (outs), (i
}
// Floating point compares.
-let SchedRW = [WriteFAdd] in {
+let SchedRW = [WriteFCom] in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
@@ -578,7 +580,7 @@ def UCOM_Fpr80 : FpI_ <(outs), (ins RFP
} // SchedRW
} // Defs = [FPSW]
-let SchedRW = [WriteFAdd] in {
+let SchedRW = [WriteFCom] in {
// CC = ST(0) cmp ST(i)
let Defs = [EFLAGS, FPSW] in {
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Apr 17 00:22:44 2018
@@ -1854,23 +1854,23 @@ let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFAdd>, XS, VEX_4V, VEX_LIG, VEX_WIG;
+ WriteFCmp>, XS, VEX_4V, VEX_LIG, VEX_WIG;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFAdd>, // same latency as 32 bit compare
+ WriteFCmp>, // same latency as 32 bit compare
XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
- "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFAdd>, XS;
+ "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XS;
let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
- "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFAdd>, XD;
+ "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XD;
}
multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
@@ -1894,21 +1894,21 @@ let isCodeGenOnly = 1 in {
let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- WriteFAdd, sse_load_f32>, XS, VEX_4V;
+ WriteFCmp, sse_load_f32>, XS, VEX_4V;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- WriteFAdd, sse_load_f64>, // same latency as f32
+ WriteFCmp, sse_load_f64>, // same latency as f32
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
- WriteFAdd, sse_load_f32>, XS;
+ WriteFCmp, sse_load_f32>, XS;
let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
- WriteFAdd, sse_load_f64>, XD;
+ WriteFCmp, sse_load_f64>, XD;
}
}
@@ -1951,49 +1951,49 @@ let mayLoad = 1 in
let Defs = [EFLAGS] in {
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
- "ucomiss", WriteFAdd>, PS, VEX, VEX_LIG, VEX_WIG;
+ "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
- "ucomisd", WriteFAdd>, PD, VEX, VEX_LIG, VEX_WIG;
+ "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
let Pattern = []<dag> in {
defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
- "comiss", WriteFAdd>, PS, VEX, VEX_LIG, VEX_WIG;
+ "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
- "comisd", WriteFAdd>, PD, VEX, VEX_LIG, VEX_WIG;
+ "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
}
let isCodeGenOnly = 1 in {
defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", WriteFAdd>, PS, VEX, VEX_WIG;
+ sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_WIG;
defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", WriteFAdd>, PD, VEX, VEX_WIG;
+ sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_WIG;
defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", WriteFAdd>, PS, VEX, VEX_WIG;
+ sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_WIG;
defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", WriteFAdd>, PD, VEX, VEX_WIG;
+ sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_WIG;
}
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
- "ucomiss", WriteFAdd>, PS;
+ "ucomiss", WriteFCom>, PS;
defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
- "ucomisd", WriteFAdd>, PD;
+ "ucomisd", WriteFCom>, PD;
let Pattern = []<dag> in {
defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
- "comiss", WriteFAdd>, PS;
+ "comiss", WriteFCom>, PS;
defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
- "comisd", WriteFAdd>, PD;
+ "comisd", WriteFCom>, PD;
}
let isCodeGenOnly = 1 in {
defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", WriteFAdd>, PS;
+ sse_load_f32, "ucomiss", WriteFCom>, PS;
defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", WriteFAdd>, PD;
+ sse_load_f64, "ucomisd", WriteFCom>, PD;
defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", WriteFAdd>, PS;
+ sse_load_f32, "comiss", WriteFCom>, PS;
defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", WriteFAdd>, PD;
+ sse_load_f64, "comisd", WriteFCom>, PD;
}
} // Defs = [EFLAGS]
@@ -2028,28 +2028,28 @@ multiclass sse12_cmp_packed<RegisterClas
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, v4f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFAdd, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
+ WriteFCmp, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, v2f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFAdd, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
+ WriteFCmp, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, v8f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFAdd, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L;
+ WriteFCmp, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, v4f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFAdd, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L;
+ WriteFCmp, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, v4f32,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- WriteFAdd, SSEPackedSingle, memopv4f32>, PS;
+ WriteFCmp, SSEPackedSingle, memopv4f32>, PS;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, v2f64,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- WriteFAdd, SSEPackedDouble, memopv2f64>, PD;
+ WriteFCmp, SSEPackedDouble, memopv2f64>, PD;
}
def CommutableCMPCC : PatLeaf<(imm), [{
@@ -2583,19 +2583,19 @@ let isCommutable = 0 in {
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, WriteFDiv>,
basic_sse12_fp_binop_s<0x5E, "div", fdiv, WriteFDiv>,
basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, WriteFDiv>;
- defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFAdd>,
- basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFAdd>,
- basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFAdd>;
- defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFAdd>,
- basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFAdd>,
- basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFAdd>;
+ defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFCmp>,
+ basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFCmp>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFCmp>;
+ defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFCmp>,
+ basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFCmp>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFCmp>;
}
let isCodeGenOnly = 1 in {
- defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFAdd>,
- basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFAdd>;
- defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFAdd>,
- basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFAdd>;
+ defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFCmp>,
+ basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFCmp>;
+ defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFCmp>,
+ basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFCmp>;
}
// Patterns used to select SSE scalar fp arithmetic instructions from
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Tue Apr 17 00:22:44 2018
@@ -154,7 +154,9 @@ def : WriteRes<WriteFLoad, [BWPo
def : WriteRes<WriteFStore, [BWPort237, BWPort4]>;
def : WriteRes<WriteFMove, [BWPort5]>;
-defm : BWWriteResPair<WriteFAdd, [BWPort1], 3>; // Floating point add/sub/compare.
+defm : BWWriteResPair<WriteFAdd, [BWPort1], 3>; // Floating point add/sub.
+defm : BWWriteResPair<WriteFCmp, [BWPort1], 3>; // Floating point compare.
+defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
defm : BWWriteResPair<WriteFMul, [BWPort0], 5>; // Floating point multiplication.
defm : BWWriteResPair<WriteFDiv, [BWPort0], 12>; // 10-14 cycles. // Floating point division.
defm : BWWriteResPair<WriteFSqrt, [BWPort0], 15>; // Floating point square root.
@@ -843,29 +845,13 @@ def: InstRW<[BWWriteResGroup27], (instre
"(V?)ADDSSrr",
"(V?)ADDSUBPD(Y?)rr",
"(V?)ADDSUBPS(Y?)rr",
- "(V?)CMPPD(Y?)rri",
- "(V?)CMPPS(Y?)rri",
- "(V?)CMPSDrr",
- "(V?)CMPSSrr",
- "(V?)COMISDrr",
- "(V?)COMISSrr",
"(V?)CVTDQ2PS(Y?)rr",
"(V?)CVTPS2DQ(Y?)rr",
"(V?)CVTTPS2DQ(Y?)rr",
- "(V?)MAX(C?)PD(Y?)rr",
- "(V?)MAX(C?)PS(Y?)rr",
- "(V?)MAX(C?)SDrr",
- "(V?)MAX(C?)SSrr",
- "(V?)MIN(C?)PD(Y?)rr",
- "(V?)MIN(C?)PS(Y?)rr",
- "(V?)MIN(C?)SDrr",
- "(V?)MIN(C?)SSrr",
"(V?)SUBPD(Y?)rr",
"(V?)SUBPS(Y?)rr",
"(V?)SUBSDrr",
- "(V?)SUBSSrr",
- "(V?)UCOMISDrr",
- "(V?)UCOMISSrr")>;
+ "(V?)SUBSSrr")>;
def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> {
let Latency = 3;
@@ -1832,29 +1818,13 @@ def: InstRW<[BWWriteResGroup91], (instre
"(V?)ADDSSrm",
"(V?)ADDSUBPDrm",
"(V?)ADDSUBPSrm",
- "(V?)CMPPDrmi",
- "(V?)CMPPSrmi",
- "(V?)CMPSDrm",
- "(V?)CMPSSrm",
- "(V?)COMISDrm",
- "(V?)COMISSrm",
"(V?)CVTDQ2PSrm",
"(V?)CVTPS2DQrm",
"(V?)CVTTPS2DQrm",
- "(V?)MAX(C?)PDrm",
- "(V?)MAX(C?)PSrm",
- "(V?)MAX(C?)SDrm",
- "(V?)MAX(C?)SSrm",
- "(V?)MIN(C?)PDrm",
- "(V?)MIN(C?)PSrm",
- "(V?)MIN(C?)SDrm",
- "(V?)MIN(C?)SSrm",
"(V?)SUBPDrm",
"(V?)SUBPSrm",
"(V?)SUBSDrm",
- "(V?)SUBSSrm",
- "(V?)UCOMISDrm",
- "(V?)UCOMISSrm")>;
+ "(V?)SUBSSrm")>;
def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> {
let Latency = 8;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Tue Apr 17 00:22:44 2018
@@ -149,6 +149,8 @@ def : WriteRes<WriteFLoad, [HWPo
def : WriteRes<WriteFMove, [HWPort5]>;
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3>;
+defm : HWWriteResPair<WriteFCmp, [HWPort1], 3, [1], 1, 6>;
+defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMul, [HWPort0], 5>;
defm : HWWriteResPair<WriteFDiv, [HWPort0], 12>; // 10-14 cycles.
defm : HWWriteResPair<WriteFRcp, [HWPort0], 5>;
@@ -1041,16 +1043,12 @@ def: InstRW<[HWWriteResGroup12], (instre
"(V?)ADDSSrm",
"(V?)CMPSDrm",
"(V?)CMPSSrm",
- "(V?)COMISDrm",
- "(V?)COMISSrm",
"(V?)MAX(C?)SDrm",
"(V?)MAX(C?)SSrm",
"(V?)MIN(C?)SDrm",
"(V?)MIN(C?)SSrm",
"(V?)SUBSDrm",
- "(V?)SUBSSrm",
- "(V?)UCOMISDrm",
- "(V?)UCOMISSrm")>;
+ "(V?)SUBSSrm")>;
def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 7;
@@ -1730,29 +1728,13 @@ def: InstRW<[HWWriteResGroup50], (instre
"(V?)ADDSSrr",
"(V?)ADDSUBPD(Y?)rr",
"(V?)ADDSUBPS(Y?)rr",
- "(V?)CMPPD(Y?)rri",
- "(V?)CMPPS(Y?)rri",
- "(V?)CMPSDrr",
- "(V?)CMPSSrr",
- "(V?)COMISDrr",
- "(V?)COMISSrr",
"(V?)CVTDQ2PS(Y?)rr",
"(V?)CVTPS2DQ(Y?)rr",
"(V?)CVTTPS2DQ(Y?)rr",
- "(V?)MAX(C?)PD(Y?)rr",
- "(V?)MAX(C?)PS(Y?)rr",
- "(V?)MAX(C?)SDrr",
- "(V?)MAX(C?)SSrr",
- "(V?)MIN(C?)PD(Y?)rr",
- "(V?)MIN(C?)PS(Y?)rr",
- "(V?)MIN(C?)SDrr",
- "(V?)MIN(C?)SSrr",
"(V?)SUBPD(Y?)rr",
"(V?)SUBPS(Y?)rr",
"(V?)SUBSDrr",
- "(V?)SUBSSrr",
- "(V?)UCOMISDrr",
- "(V?)UCOMISSrr")>;
+ "(V?)SUBSSrr")>;
def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> {
let Latency = 3;
@@ -1804,15 +1786,9 @@ def: InstRW<[HWWriteResGroup52], (instre
"(V?)ADDPSrm",
"(V?)ADDSUBPDrm",
"(V?)ADDSUBPSrm",
- "(V?)CMPPDrmi",
- "(V?)CMPPSrmi",
"(V?)CVTDQ2PSrm",
"(V?)CVTPS2DQrm",
"(V?)CVTTPS2DQrm",
- "(V?)MAX(C?)PDrm",
- "(V?)MAX(C?)PSrm",
- "(V?)MIN(C?)PDrm",
- "(V?)MIN(C?)PSrm",
"(V?)SUBPDrm",
"(V?)SUBPSrm")>;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Tue Apr 17 00:22:44 2018
@@ -139,6 +139,8 @@ def : WriteRes<WriteFLoad, [SBPo
def : WriteRes<WriteFMove, [SBPort5]>;
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3>;
+defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>;
+defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
defm : SBWriteResPair<WriteFMul, [SBPort0], 5>;
defm : SBWriteResPair<WriteFDiv, [SBPort0], 24>;
defm : SBWriteResPair<WriteFRcp, [SBPort0], 5>;
@@ -685,21 +687,9 @@ def: InstRW<[SBWriteResGroup21], (instre
"(V?)ADDSSrr",
"(V?)ADDSUBPD(Y?)rr",
"(V?)ADDSUBPS(Y?)rr",
- "(V?)CMPPD(Y?)rri",
- "(V?)CMPPS(Y?)rri",
- "(V?)CMPSDrr",
- "(V?)CMPSSrr",
"(V?)CVTDQ2PS(Y?)rr",
"(V?)CVTPS2DQ(Y?)rr",
"(V?)CVTTPS2DQ(Y?)rr",
- "(V?)MAX(C?)PD(Y?)rr",
- "(V?)MAX(C?)PS(Y?)rr",
- "(V?)MAX(C?)SDrr",
- "(V?)MAX(C?)SSrr",
- "(V?)MIN(C?)PD(Y?)rr",
- "(V?)MIN(C?)PS(Y?)rr",
- "(V?)MIN(C?)SDrr",
- "(V?)MIN(C?)SSrr",
"(V?)ROUNDPD(Y?)r",
"(V?)ROUNDPS(Y?)r",
"(V?)ROUNDSDr",
@@ -1562,23 +1552,11 @@ def: InstRW<[SBWriteResGroup90], (instre
"(V?)ADDSSrm",
"(V?)ADDSUBPDrm",
"(V?)ADDSUBPSrm",
- "(V?)CMPPDrmi",
- "(V?)CMPPSrmi",
- "(V?)CMPSDrm",
- "(V?)CMPSSrm",
"(V?)CVTDQ2PSrm",
"(V?)CVTPS2DQrm",
"(V?)CVTSI642SDrm",
"(V?)CVTSI2SDrm",
"(V?)CVTTPS2DQrm",
- "(V?)MAX(C?)PDrm",
- "(V?)MAX(C?)PSrm",
- "(V?)MAX(C?)SDrm",
- "(V?)MAX(C?)SSrm",
- "(V?)MIN(C?)PDrm",
- "(V?)MIN(C?)PSrm",
- "(V?)MIN(C?)SDrm",
- "(V?)MIN(C?)SSrm",
"(V?)ROUNDPDm",
"(V?)ROUNDPSm",
"(V?)ROUNDSDm",
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Tue Apr 17 00:22:44 2018
@@ -151,7 +151,9 @@ def : WriteRes<WriteFLoad, [SKL
def : WriteRes<WriteFStore, [SKLPort237, SKLPort4]>;
def : WriteRes<WriteFMove, [SKLPort015]>;
-defm : SKLWriteResPair<WriteFAdd, [SKLPort1], 3>; // Floating point add/sub/compare.
+defm : SKLWriteResPair<WriteFAdd, [SKLPort1], 3>; // Floating point add/sub.
+defm : SKLWriteResPair<WriteFCmp, [SKLPort01], 4, [1], 1, 6>; // Floating point compare.
+defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
defm : SKLWriteResPair<WriteFMul, [SKLPort0], 5>; // Floating point multiplication.
defm : SKLWriteResPair<WriteFDiv, [SKLPort0], 12>; // 10-14 cycles. // Floating point division.
defm : SKLWriteResPair<WriteFSqrt, [SKLPort0], 15>; // Floating point square root.
@@ -672,14 +674,10 @@ def SKLWriteResGroup12 : SchedWriteRes<[
}
def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64from64rr",
"MMX_MOVD64grr",
- "(V?)COMISDrr",
- "(V?)COMISSrr",
"(V?)MOVPDI2DIrr",
"(V?)MOVPQIto64rr",
"VTESTPD(Y?)rr",
- "VTESTPS(Y?)rr",
- "(V?)UCOMISDrr",
- "(V?)UCOMISSrr")>;
+ "VTESTPS(Y?)rr")>;
def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> {
let Latency = 2;
@@ -1067,21 +1065,9 @@ def: InstRW<[SKLWriteResGroup48], (instr
"(V?)ADDSSrr",
"(V?)ADDSUBPD(Y?)rr",
"(V?)ADDSUBPS(Y?)rr",
- "(V?)CMPPD(Y?)rri",
- "(V?)CMPPS(Y?)rri",
- "(V?)CMPSDrr",
- "(V?)CMPSSrr",
"(V?)CVTDQ2PS(Y?)rr",
"(V?)CVTPS2DQ(Y?)rr",
"(V?)CVTTPS2DQ(Y?)rr",
- "(V?)MAX(C?)PD(Y?)rr",
- "(V?)MAX(C?)PS(Y?)rr",
- "(V?)MAX(C?)SDrr",
- "(V?)MAX(C?)SSrr",
- "(V?)MIN(C?)PD(Y?)rr",
- "(V?)MIN(C?)PS(Y?)rr",
- "(V?)MIN(C?)SDrr",
- "(V?)MIN(C?)SSrr",
"(V?)MULPD(Y?)rr",
"(V?)MULPS(Y?)rr",
"(V?)MULSDrr",
@@ -1547,16 +1533,6 @@ def SKLWriteResGroup86 : SchedWriteRes<[
}
def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>;
-def SKLWriteResGroup87 : SchedWriteRes<[SKLPort0,SKLPort23]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup87], (instregex "(V?)COMISDrm",
- "(V?)COMISSrm",
- "(V?)UCOMISDrm",
- "(V?)UCOMISSrm")>;
-
def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 7;
let NumMicroOps = 2;
@@ -2196,17 +2172,11 @@ def: InstRW<[SKLWriteResGroup134], (inst
"(V?)ADDPSrm",
"(V?)ADDSUBPDrm",
"(V?)ADDSUBPSrm",
- "(V?)CMPPDrmi",
- "(V?)CMPPSrmi",
"(V?)CVTDQ2PSrm",
"(V?)CVTPH2PSYrm",
"(V?)CVTPS2DQrm",
"(V?)CVTSS2SDrm",
"(V?)CVTTPS2DQrm",
- "(V?)MAX(C?)PDrm",
- "(V?)MAX(C?)PSrm",
- "(V?)MIN(C?)PDrm",
- "(V?)MIN(C?)PSrm",
"(V?)MULPDrm",
"(V?)MULPSrm",
"(V?)PHMINPOSUWrm",
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Tue Apr 17 00:22:44 2018
@@ -151,7 +151,9 @@ def : WriteRes<WriteFLoad, [SKX
def : WriteRes<WriteFStore, [SKXPort237, SKXPort4]>;
def : WriteRes<WriteFMove, [SKXPort015]>;
-defm : SKXWriteResPair<WriteFAdd, [SKXPort1], 3>; // Floating point add/sub/compare.
+defm : SKXWriteResPair<WriteFAdd, [SKXPort1], 3>; // Floating point add/sub.
+defm : SKXWriteResPair<WriteFCmp, [SKXPort015], 4, [1], 1, 6>; // Floating point compare.
+defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
defm : SKXWriteResPair<WriteFMul, [SKXPort0], 5>; // Floating point multiplication.
defm : SKXWriteResPair<WriteFDiv, [SKXPort0], 12>; // 10-14 cycles. // Floating point division.
defm : SKXWriteResPair<WriteFSqrt, [SKXPort0], 15>; // Floating point square root.
@@ -1406,18 +1408,10 @@ def SKXWriteResGroup12 : SchedWriteRes<[
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr",
- "COMISSrr",
- "MMX_MOVD64from64rr",
+def: InstRW<[SKXWriteResGroup12], (instregex "MMX_MOVD64from64rr",
"MMX_MOVD64grr",
"MOVPDI2DIrr",
"MOVPQIto64rr",
- "UCOMISDrr",
- "UCOMISSrr",
- "VCOMISDZrr(b?)",
- "VCOMISDrr",
- "VCOMISSZrr(b?)",
- "VCOMISSrr",
"VMOVPDI2DIZrr",
"VMOVPDI2DIrr",
"VMOVPQIto64Zrr",
@@ -1425,11 +1419,7 @@ def: InstRW<[SKXWriteResGroup12], (instr
"VTESTPDYrr",
"VTESTPDrr",
"VTESTPSYrr",
- "VTESTPSrr",
- "VUCOMISDZrr(b?)",
- "VUCOMISDrr",
- "VUCOMISSZrr(b?)",
- "VUCOMISSrr")>;
+ "VTESTPSrr")>;
def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> {
let Latency = 2;
@@ -2162,21 +2152,9 @@ def: InstRW<[SKXWriteResGroup50], (instr
"ADDSSrr",
"ADDSUBPDrr",
"ADDSUBPSrr",
- "CMPPDrri",
- "CMPPSrri",
- "CMPSDrr",
- "CMPSSrr",
"CVTDQ2PSrr",
"CVTPS2DQrr",
"CVTTPS2DQrr",
- "MAX(C?)PDrr",
- "MAX(C?)PSrr",
- "MAX(C?)SDrr",
- "MAX(C?)SSrr",
- "MIN(C?)PDrr",
- "MIN(C?)PSrr",
- "MIN(C?)SDrr",
- "MIN(C?)SSrr",
"MULPDrr",
"MULPSrr",
"MULSDrr",
@@ -2212,12 +2190,6 @@ def: InstRW<[SKXWriteResGroup50], (instr
"VADDSUBPDrr",
"VADDSUBPSYrr",
"VADDSUBPSrr",
- "VCMPPDYrri",
- "VCMPPDrri",
- "VCMPPSYrri",
- "VCMPPSrri",
- "VCMPSDrr",
- "VCMPSSrr",
"VCVTDQ2PSYrr",
"VCVTDQ2PSZ128rr",
"VCVTDQ2PSZ256rr",
@@ -2284,34 +2256,6 @@ def: InstRW<[SKXWriteResGroup50], (instr
"VGETMANTPSZrri",
"VGETMANTSDZ128rri",
"VGETMANTSSZ128rri",
- "VMAX(C?)PDYrr",
- "VMAX(C?)PDZ128rr",
- "VMAX(C?)PDZ256rr",
- "VMAX(C?)PDZrr",
- "VMAX(C?)PDrr",
- "VMAX(C?)PSYrr",
- "VMAX(C?)PSZ128rr",
- "VMAX(C?)PSZ256rr",
- "VMAX(C?)PSZrr",
- "VMAX(C?)PSrr",
- "VMAX(C?)SDZrr",
- "VMAX(C?)SDrr",
- "VMAX(C?)SSZrr",
- "VMAX(C?)SSrr",
- "VMIN(C?)PDYrr",
- "VMIN(C?)PDZ128rr",
- "VMIN(C?)PDZ256rr",
- "VMIN(C?)PDZrr",
- "VMIN(C?)PDrr",
- "VMIN(C?)PSYrr",
- "VMIN(C?)PSZ128rr",
- "VMIN(C?)PSZ256rr",
- "VMIN(C?)PSZrr",
- "VMIN(C?)PSrr",
- "VMIN(C?)SDZrr",
- "VMIN(C?)SDrr",
- "VMIN(C?)SSZrr",
- "VMIN(C?)SSrr",
"VMULPDYrr",
"VMULPDZ128rr",
"VMULPDZ256rr",
@@ -3145,24 +3089,6 @@ def SKXWriteResGroup90 : SchedWriteRes<[
}
def: InstRW<[SKXWriteResGroup90], (instregex "VCVTDQ2PDYrr")>;
-def SKXWriteResGroup91 : SchedWriteRes<[SKXPort0,SKXPort23]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup91], (instregex "COMISDrm",
- "COMISSrm",
- "UCOMISDrm",
- "UCOMISSrm",
- "VCOMISDZrm(b?)",
- "VCOMISDrm",
- "VCOMISSZrm(b?)",
- "VCOMISSrm",
- "VUCOMISDZrm(b?)",
- "VUCOMISDrm",
- "VUCOMISSZrm(b?)",
- "VUCOMISSrm")>;
-
def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 7;
let NumMicroOps = 2;
@@ -4744,16 +4670,10 @@ def: InstRW<[SKXWriteResGroup149], (inst
"ADDPSrm",
"ADDSUBPDrm",
"ADDSUBPSrm",
- "CMPPDrmi",
- "CMPPSrmi",
"CVTDQ2PSrm",
"CVTPS2DQrm",
"CVTSS2SDrm",
"CVTTPS2DQrm",
- "MAX(C?)PDrm",
- "MAX(C?)PSrm",
- "MIN(C?)PDrm",
- "MIN(C?)PSrm",
"MULPDrm",
"MULPSrm",
"PHMINPOSUWrm",
@@ -4775,8 +4695,6 @@ def: InstRW<[SKXWriteResGroup149], (inst
"VADDSSZrm",
"VADDSUBPDrm",
"VADDSUBPSrm",
- "VCMPPDrmi",
- "VCMPPSrmi",
"VCVTDQ2PDZ128rm(b?)",
"VCVTDQ2PSZ128rm(b?)",
"VCVTDQ2PSrm",
@@ -4817,18 +4735,6 @@ def: InstRW<[SKXWriteResGroup149], (inst
"VGETMANTPSZ128rm(b?)i",
"VGETMANTSDZ128rmi(b?)",
"VGETMANTSSZ128rmi(b?)",
- "VMAX(C?)PDZ128rm(b?)",
- "VMAX(C?)PDrm",
- "VMAX(C?)PSZ128rm(b?)",
- "VMAX(C?)PSrm",
- "VMAX(C?)SDZrm",
- "VMAX(C?)SSZrm",
- "VMIN(C?)PDZ128rm(b?)",
- "VMIN(C?)PDrm",
- "VMIN(C?)PSZ128rm(b?)",
- "VMIN(C?)PSrm",
- "VMIN(C?)SDZrm",
- "VMIN(C?)SSZrm",
"VMULPDZ128rm(b?)",
"VMULPDrm",
"VMULPSZ128rm(b?)",
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Tue Apr 17 00:22:44 2018
@@ -78,7 +78,9 @@ defm WriteJump : X86SchedWritePair;
def WriteFLoad : SchedWrite;
def WriteFStore : SchedWrite;
def WriteFMove : SchedWrite;
-defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
+defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
+defm WriteFCmp : X86SchedWritePair; // Floating point compare.
+defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
defm WriteFDiv : X86SchedWritePair; // Floating point division.
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Tue Apr 17 00:22:44 2018
@@ -203,6 +203,8 @@ def : WriteRes<WriteFStore, [AtomPort0]
def : WriteRes<WriteFMove, [AtomPort01]>;
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Tue Apr 17 00:22:44 2018
@@ -294,6 +294,8 @@ def : WriteRes<WriteFStore, [JSA
def : WriteRes<WriteFMove, [JFPU01, JFPX]>;
defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>;
+defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>;
+defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFMA, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>;
@@ -704,28 +706,6 @@ def JWriteVMOVNTPYSt: SchedWriteRes<[JFP
}
def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTDQYmr, VMOVNTPDYmr, VMOVNTPSYmr)>;
-def JWriteFComi : SchedWriteRes<[JFPU0, JFPA, JALU0]> {
- let Latency = 3;
-}
-def : InstRW<[JWriteFComi], (instregex "(V)?(U)?COMIS(D|S)rr")>;
-
-def JWriteFComiLd : SchedWriteRes<[JLAGU, JFPU0, JFPA, JALU0]> {
- let Latency = 8;
-}
-def : InstRW<[JWriteFComiLd], (instregex "(V)?(U)?COMIS(D|S)rm")>;
-
-def JWriteFCmp: SchedWriteRes<[JFPU0, JFPA]> {
- let Latency = 2;
-}
-def : InstRW<[JWriteFCmp], (instregex "(V)?M(AX|IN)(P|S)(D|S)rr",
- "(V)?CMPP(S|D)rri", "(V)?CMPS(S|D)rr")>;
-
-def JWriteFCmpLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> {
- let Latency = 7;
-}
-def : InstRW<[JWriteFCmpLd], (instregex "(V)?M(AX|IN)(P|S)(D|S)rm",
- "(V)?CMPP(S|D)rmi", "(V)?CMPS(S|D)rm")>;
-
def JWriteFCmpY: SchedWriteRes<[JFPU0, JFPA]> {
let Latency = 2;
let ResourceCycles = [2, 2];
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Tue Apr 17 00:22:44 2018
@@ -125,6 +125,8 @@ def : WriteRes<WriteFLoad, [SLM_
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Tue Apr 17 00:22:44 2018
@@ -193,6 +193,8 @@ def : WriteRes<WriteFLoad,
defm : ZnWriteResFpuPair<WriteFHAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>;
Modified: llvm/trunk/test/CodeGen/X86/x87-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x87-schedule.ll?rev=330179&r1=330178&r2=330179&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x87-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x87-schedule.ll Tue Apr 17 00:22:44 2018
@@ -1187,16 +1187,16 @@ define void @test_fcomi_fcomip() optsize
; SKYLAKE-LABEL: test_fcomi_fcomip:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fcomi %st(3) # sched: [3:1.00]
-; SKYLAKE-NEXT: fcompi %st(3) # sched: [3:1.00]
+; SKYLAKE-NEXT: fcomi %st(3) # sched: [2:1.00]
+; SKYLAKE-NEXT: fcompi %st(3) # sched: [2:1.00]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
; SKX-LABEL: test_fcomi_fcomip:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: fcomi %st(3) # sched: [3:1.00]
-; SKX-NEXT: fcompi %st(3) # sched: [3:1.00]
+; SKX-NEXT: fcomi %st(3) # sched: [2:1.00]
+; SKX-NEXT: fcompi %st(3) # sched: [2:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
@@ -5073,14 +5073,14 @@ define void @test_ftst() optsize {
; SKYLAKE-LABEL: test_ftst:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: ftst # sched: [3:1.00]
+; SKYLAKE-NEXT: ftst # sched: [2:1.00]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
; SKX-LABEL: test_ftst:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: ftst # sched: [3:1.00]
+; SKX-NEXT: ftst # sched: [2:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
@@ -5175,7 +5175,7 @@ define void @test_fucom_fucomp_fucompp()
; SKYLAKE-NEXT: fucom %st(3) # sched: [1:1.00]
; SKYLAKE-NEXT: fucomp %st(1) # sched: [1:1.00]
; SKYLAKE-NEXT: fucomp %st(3) # sched: [1:1.00]
-; SKYLAKE-NEXT: fucompp # sched: [3:1.00]
+; SKYLAKE-NEXT: fucompp # sched: [2:1.00]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
@@ -5186,7 +5186,7 @@ define void @test_fucom_fucomp_fucompp()
; SKX-NEXT: fucom %st(3) # sched: [1:1.00]
; SKX-NEXT: fucomp %st(1) # sched: [1:1.00]
; SKX-NEXT: fucomp %st(3) # sched: [1:1.00]
-; SKX-NEXT: fucompp # sched: [3:1.00]
+; SKX-NEXT: fucompp # sched: [2:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
@@ -5267,16 +5267,16 @@ define void @test_fucomi_fucomip() optsi
; SKYLAKE-LABEL: test_fucomi_fucomip:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fucomi %st(3) # sched: [3:1.00]
-; SKYLAKE-NEXT: fucompi %st(3) # sched: [3:1.00]
+; SKYLAKE-NEXT: fucomi %st(3) # sched: [2:1.00]
+; SKYLAKE-NEXT: fucompi %st(3) # sched: [2:1.00]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
; SKX-LABEL: test_fucomi_fucomip:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: fucomi %st(3) # sched: [3:1.00]
-; SKX-NEXT: fucompi %st(3) # sched: [3:1.00]
+; SKX-NEXT: fucomi %st(3) # sched: [2:1.00]
+; SKX-NEXT: fucompi %st(3) # sched: [2:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
More information about the llvm-commits
mailing list