[llvm] r319338 - [X86][AVX512] Tag RCP/RSQRT/GETEXP instructions scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 29 11:19:59 PST 2017
Author: rksimon
Date: Wed Nov 29 11:19:59 2017
New Revision: 319338
URL: http://llvm.org/viewvc/llvm-project?rev=319338&view=rev
Log:
[X86][AVX512] Tag RCP/RSQRT/GETEXP instructions scheduler classes
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319338&r1=319337&r2=319338&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Nov 29 11:19:59 2017
@@ -4549,7 +4549,7 @@ multiclass avx512_fp_scalar<bits<8> opc,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1, _.RC:$src2,
(i32 FROUND_CURRENT))),
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
@@ -4557,20 +4557,21 @@ multiclass avx512_fp_scalar<bits<8> opc,
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))),
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
- itins.rr> {
+ itins.rr>, Sched<[itins.Sched]> {
let isCommutable = IsCommutable;
}
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
- (_.ScalarLdFrag addr:$src2)))], itins.rm>;
+ (_.ScalarLdFrag addr:$src2)))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
}
@@ -4583,7 +4584,7 @@ multiclass avx512_fp_scalar_round<bits<8
"$rc, $src2, $src1", "$src1, $src2, $rc",
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$rc)), itins.rr, IsCommutable>,
- EVEX_B, EVEX_RC;
+ EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
}
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode, SDNode VecNode, SDNode SaeNode,
@@ -4593,35 +4594,36 @@ multiclass avx512_fp_scalar_sae<bits<8>
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1, _.RC:$src2)),
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2)),
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
- itins.rr> {
+ itins.rr>, Sched<[itins.Sched]> {
let isCommutable = IsCommutable;
}
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
- (_.ScalarLdFrag addr:$src2)))], itins.rm>;
+ (_.ScalarLdFrag addr:$src2)))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 FROUND_NO_EXC))>, EVEX_B;
+ (i32 FROUND_NO_EXC))>, EVEX_B, Sched<[itins.Sched]>;
}
}
@@ -7364,32 +7366,34 @@ let Defs = [EFLAGS], Predicates = [HasAV
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
+ OpndItins itins, X86VectorVTInfo _> {
let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
+ EVEX_4V, Sched<[itins.Sched]>;
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
- _.ScalarIntMemCPat:$src2)>, EVEX_4V;
+ _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
-defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, f32x_info>,
+defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
-defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, f64x_info>,
+defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
-defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, f32x_info>,
+defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
-defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, f64x_info>,
+defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
+ OpndItins itins, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
@@ -7407,35 +7411,36 @@ multiclass avx512_fp14_p<bits<8> opc, st
}
}
-multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
- defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SizeItins itins> {
+ defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
+ v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
+ v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
- OpNode, v4f32x_info>,
+ OpNode, itins.s, v4f32x_info>,
EVEX_V128, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
- OpNode, v8f32x_info>,
+ OpNode, itins.s, v8f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
- OpNode, v2f64x_info>,
+ OpNode, itins.d, v2f64x_info>,
EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
- OpNode, v4f64x_info>,
+ OpNode, itins.d, v4f64x_info>,
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
}
}
-defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14>;
-defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14>;
+defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
+defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
- SDNode OpNode> {
+ SDNode OpNode, OpndItins itins> {
let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
@@ -7457,23 +7462,27 @@ multiclass avx512_fp28_s<bits<8> opc, st
}
}
-multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
- defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode>,
+multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SizeItins itins> {
+ defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
EVEX_CD8<32, CD8VT1>;
- defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode>,
+ defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
EVEX_CD8<64, CD8VT1>, VEX_W;
}
let Predicates = [HasERI] in {
- defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V;
- defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
+ defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>,
+ T8PD, EVEX_4V;
+ defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
+ T8PD, EVEX_4V;
}
-defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
+defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
+ T8PD, EVEX_4V;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- SDNode OpNode> {
+ SDNode OpNode, OpndItins itins> {
let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
@@ -7494,7 +7503,7 @@ multiclass avx512_fp28_p<bits<8> opc, st
}
}
multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- SDNode OpNode> {
+ SDNode OpNode, OpndItins itins> {
let ExeDomain = _.ExeDomain in
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr,
@@ -7502,37 +7511,39 @@ multiclass avx512_fp28_p_round<bits<8> o
(OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
}
-multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
- defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
- avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
+multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SizeItins itins> {
+ defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
+ avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
- avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
+ defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
+ avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
- SDNode OpNode> {
+ SDNode OpNode, SizeItins itins> {
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
- defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>,
+ defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
- defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>,
+ defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
- defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>,
+ defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
- defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>,
+ defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
}
}
let Predicates = [HasERI] in {
- defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX;
- defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX;
- defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX;
-}
-defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
- avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX;
+ defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
+ defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
+ defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
+}
+defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
+ avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
+ SSE_ALU_ITINS_P>, EVEX;
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
X86VectorVTInfo _>{
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=319338&r1=319337&r2=319338&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Nov 29 11:19:59 2017
@@ -3040,6 +3040,14 @@ def SSE_RSQRTSS : OpndItins<
>;
}
+def SSE_RSQRT_P : SizeItins<
+ SSE_RSQRTPS, SSE_RSQRTPS
+>;
+
+def SSE_RSQRT_S : SizeItins<
+ SSE_RSQRTSS, SSE_RSQRTSS
+>;
+
let Sched = WriteFRcp in {
def SSE_RCPP : OpndItins<
IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
@@ -3050,6 +3058,14 @@ def SSE_RCPS : OpndItins<
>;
}
+def SSE_RCP_P : SizeItins<
+ SSE_RCPP, SSE_RCPP
+>;
+
+def SSE_RCP_S : SizeItins<
+ SSE_RCPS, SSE_RCPS
+>;
+
/// sse_fp_unop_s - SSE1 unops in scalar form
/// For the non-AVX defs, we need $src1 to be tied to $dst because
/// the HW instructions are 2 operand / destructive.
Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll?rev=319338&r1=319337&r2=319338&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll Wed Nov 29 11:19:59 2017
@@ -380,12 +380,12 @@ define float @f32_two_step_2(float %x) #
;
; SKX-LABEL: f32_two_step_2:
; SKX: # BB#0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.33]
-; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [4:0.33]
-; SKX-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm2 # sched: [4:1.00]
+; SKX-NEXT: vmovaps %xmm2, %xmm3 # sched: [1:1.00]
+; SKX-NEXT: vfnmadd213ss %xmm1, %xmm0, %xmm3 # sched: [4:0.33]
+; SKX-NEXT: vfmadd132ss %xmm2, %xmm2, %xmm3 # sched: [4:0.33]
+; SKX-NEXT: vfnmadd213ss %xmm1, %xmm3, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
More information about the llvm-commits
mailing list