[llvm] r332090 - [X86] Added scheduler helper classes to split move/load/store by size
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri May 11 05:46:54 PDT 2018
Author: rksimon
Date: Fri May 11 05:46:54 2018
New Revision: 332090
URL: http://llvm.org/viewvc/llvm-project?rev=332090&view=rev
Log:
[X86] Added scheduler helper classes to split move/load/store by size
Nothing uses this yet but this will allow us to specialize MMX/XMM/YMM/ZMM vector moves.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrMMX.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86Schedule.td
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=332090&r1=332089&r2=332090&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri May 11 05:46:54 2018
@@ -878,6 +878,7 @@ multiclass vextract_for_type<ValueType E
}
}
+// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
@@ -3167,16 +3168,14 @@ defm : operation_subvector_mask_lowering
// AVX-512 - Aligned and unaligned load and store
//
-
multiclass avx512_load<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
- SchedWrite SchedRR, SchedWrite SchedRM,
- bit NoRMPattern = 0,
+ X86SchedWriteMoveLS Sched, bit NoRMPattern = 0,
SDPatternOperator SelectOprr = vselect> {
let hasSideEffects = 0 in {
def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
- _.ExeDomain>, EVEX, Sched<[SchedRR]>;
+ _.ExeDomain>, EVEX, Sched<[Sched.RR]>;
def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
@@ -3184,7 +3183,7 @@ multiclass avx512_load<bits<8> opc, stri
[(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
(_.VT _.RC:$src),
_.ImmAllZerosV)))], _.ExeDomain>,
- EVEX, EVEX_KZ, Sched<[SchedRR]>;
+ EVEX, EVEX_KZ, Sched<[Sched.RR]>;
let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
@@ -3192,7 +3191,7 @@ multiclass avx512_load<bits<8> opc, stri
!if(NoRMPattern, [],
[(set _.RC:$dst,
(_.VT (bitconvert (ld_frag addr:$src))))]),
- _.ExeDomain>, EVEX, Sched<[SchedRM]>;
+ _.ExeDomain>, EVEX, Sched<[Sched.RM]>;
let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
@@ -3202,7 +3201,7 @@ multiclass avx512_load<bits<8> opc, stri
[(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
(_.VT _.RC:$src1),
(_.VT _.RC:$src0))))], _.ExeDomain>,
- EVEX, EVEX_K, Sched<[SchedRR]>;
+ EVEX, EVEX_K, Sched<[Sched.RR]>;
def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
@@ -3211,7 +3210,7 @@ multiclass avx512_load<bits<8> opc, stri
(vselect _.KRCWM:$mask,
(_.VT (bitconvert (ld_frag addr:$src1))),
(_.VT _.RC:$src0))))], _.ExeDomain>,
- EVEX, EVEX_K, Sched<[SchedRM]>;
+ EVEX, EVEX_K, Sched<[Sched.RM]>;
}
def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src),
@@ -3219,7 +3218,7 @@ multiclass avx512_load<bits<8> opc, stri
"${dst} {${mask}} {z}, $src}",
[(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
(_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
- _.ExeDomain>, EVEX, EVEX_KZ, Sched<[SchedRM]>;
+ _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
}
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
(!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
@@ -3233,65 +3232,65 @@ multiclass avx512_load<bits<8> opc, stri
}
multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _,
- Predicate prd, SchedWrite SchedRR,
- SchedWrite SchedRM, bit NoRMPattern = 0> {
+ AVX512VLVectorVTInfo _, Predicate prd,
+ X86SchedWriteMoveLSWidths Sched,
+ bit NoRMPattern = 0> {
let Predicates = [prd] in
defm Z : avx512_load<opc, OpcodeStr, _.info512,
_.info512.AlignedLdFrag, masked_load_aligned512,
- SchedRR, SchedRM, NoRMPattern>, EVEX_V512;
+ Sched.ZMM, NoRMPattern>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_load<opc, OpcodeStr, _.info256,
_.info256.AlignedLdFrag, masked_load_aligned256,
- SchedRR, SchedRM, NoRMPattern>, EVEX_V256;
+ Sched.YMM, NoRMPattern>, EVEX_V256;
defm Z128 : avx512_load<opc, OpcodeStr, _.info128,
_.info128.AlignedLdFrag, masked_load_aligned128,
- SchedRR, SchedRM, NoRMPattern>, EVEX_V128;
+ Sched.XMM, NoRMPattern>, EVEX_V128;
}
}
multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _,
- Predicate prd, SchedWrite SchedRR,
- SchedWrite SchedRM, bit NoRMPattern = 0,
- SDPatternOperator SelectOprr = vselect> {
+ AVX512VLVectorVTInfo _, Predicate prd,
+ X86SchedWriteMoveLSWidths Sched,
+ bit NoRMPattern = 0,
+ SDPatternOperator SelectOprr = vselect> {
let Predicates = [prd] in
defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
- masked_load_unaligned, SchedRR, SchedRM, NoRMPattern,
+ masked_load_unaligned, Sched.ZMM, NoRMPattern,
SelectOprr>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
- masked_load_unaligned, SchedRR, SchedRM, NoRMPattern,
+ masked_load_unaligned, Sched.YMM, NoRMPattern,
SelectOprr>, EVEX_V256;
defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
- masked_load_unaligned, SchedRR, SchedRM, NoRMPattern,
+ masked_load_unaligned, Sched.XMM, NoRMPattern,
SelectOprr>, EVEX_V128;
}
}
multiclass avx512_store<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
- string Name, SchedWrite SchedRR, SchedWrite SchedMR,
+ string Name, X86SchedWriteMoveLS Sched,
bit NoMRPattern = 0> {
let hasSideEffects = 0 in {
def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
[], _.ExeDomain>, EVEX, FoldGenData<Name#rr>,
- Sched<[SchedRR]>;
+ Sched<[Sched.RR]>;
def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
"${dst} {${mask}}, $src}",
[], _.ExeDomain>, EVEX, EVEX_K,
- FoldGenData<Name#rrk>, Sched<[SchedRR]>;
+ FoldGenData<Name#rrk>, Sched<[Sched.RR]>;
def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
"${dst} {${mask}} {z}, $src}",
[], _.ExeDomain>, EVEX, EVEX_KZ,
- FoldGenData<Name#rrkz>, Sched<[SchedRR]>;
+ FoldGenData<Name#rrkz>, Sched<[Sched.RR]>;
}
let hasSideEffects = 0, mayStore = 1 in
@@ -3299,122 +3298,116 @@ multiclass avx512_store<bits<8> opc, str
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
!if(NoMRPattern, [],
[(st_frag (_.VT _.RC:$src), addr:$dst)]),
- _.ExeDomain>, EVEX, Sched<[SchedMR]>;
+ _.ExeDomain>, EVEX, Sched<[Sched.MR]>;
def mrk : AVX512PI<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
- [], _.ExeDomain>, EVEX, EVEX_K, Sched<[SchedMR]>;
+ [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
(!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
_.KRCWM:$mask, _.RC:$src)>;
}
-
multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _, Predicate prd,
- string Name, SchedWrite SchedRR, SchedWrite SchedMR,
+ string Name, X86SchedWriteMoveLSWidths Sched,
bit NoMRPattern = 0> {
let Predicates = [prd] in
defm Z : avx512_store<opc, OpcodeStr, _.info512, store,
- masked_store_unaligned, Name#Z, SchedRR, SchedMR,
+ masked_store_unaligned, Name#Z, Sched.ZMM,
NoMRPattern>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_store<opc, OpcodeStr, _.info256, store,
- masked_store_unaligned, Name#Z256, SchedRR,
- SchedMR, NoMRPattern>, EVEX_V256;
+ masked_store_unaligned, Name#Z256, Sched.YMM,
+ NoMRPattern>, EVEX_V256;
defm Z128 : avx512_store<opc, OpcodeStr, _.info128, store,
- masked_store_unaligned, Name#Z128, SchedRR,
- SchedMR, NoMRPattern>, EVEX_V128;
+ masked_store_unaligned, Name#Z128, Sched.XMM,
+ NoMRPattern>, EVEX_V128;
}
}
multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _, Predicate prd,
- string Name, SchedWrite SchedRR,
- SchedWrite SchedMR, bit NoMRPattern = 0> {
+ AVX512VLVectorVTInfo _, Predicate prd,
+ string Name, X86SchedWriteMoveLSWidths Sched,
+ bit NoMRPattern = 0> {
let Predicates = [prd] in
defm Z : avx512_store<opc, OpcodeStr, _.info512, alignedstore,
- masked_store_aligned512, Name#Z, SchedRR, SchedMR,
+ masked_store_aligned512, Name#Z, Sched.ZMM,
NoMRPattern>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_store<opc, OpcodeStr, _.info256, alignedstore,
- masked_store_aligned256, Name#Z256, SchedRR,
- SchedMR, NoMRPattern>, EVEX_V256;
+ masked_store_aligned256, Name#Z256, Sched.YMM,
+ NoMRPattern>, EVEX_V256;
defm Z128 : avx512_store<opc, OpcodeStr, _.info128, alignedstore,
- masked_store_aligned128, Name#Z128, SchedRR,
- SchedMR, NoMRPattern>, EVEX_V128;
+ masked_store_aligned128, Name#Z128, Sched.XMM,
+ NoMRPattern>, EVEX_V128;
}
}
defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
- HasAVX512, WriteFMove, WriteFLoad>,
+ HasAVX512, SchedWriteFMoveLS>,
avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
- HasAVX512, "VMOVAPS", WriteFMove,
- WriteFStore>,
+ HasAVX512, "VMOVAPS",
+ SchedWriteFMoveLS>,
PS, EVEX_CD8<32, CD8VF>;
defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
- HasAVX512, WriteFMove, WriteFLoad>,
+ HasAVX512, SchedWriteFMoveLS>,
avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
- HasAVX512, "VMOVAPD", WriteFMove,
- WriteFStore>,
+ HasAVX512, "VMOVAPD",
+ SchedWriteFMoveLS>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
- WriteFMove, WriteFLoad, 0, null_frag>,
+ SchedWriteFMoveLS, 0, null_frag>,
avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
- "VMOVUPS", WriteFMove, WriteFStore>,
- PS, EVEX_CD8<32, CD8VF>;
+ "VMOVUPS", SchedWriteFMoveLS>,
+ PS, EVEX_CD8<32, CD8VF>;
defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
- WriteFMove, WriteFLoad, 0, null_frag>,
+ SchedWriteFMoveLS, 0, null_frag>,
avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
- "VMOVUPD", WriteFMove, WriteFStore>,
+ "VMOVUPD", SchedWriteFMoveLS>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
- HasAVX512, WriteVecMove, WriteVecLoad,
- 1>,
+ HasAVX512, SchedWriteVecMoveLS, 1>,
avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
- HasAVX512, "VMOVDQA32", WriteVecMove,
- WriteVecStore, 1>,
+ HasAVX512, "VMOVDQA32",
+ SchedWriteVecMoveLS, 1>,
PD, EVEX_CD8<32, CD8VF>;
defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
- HasAVX512, WriteVecMove, WriteVecLoad>,
+ HasAVX512, SchedWriteVecMoveLS>,
avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
- HasAVX512, "VMOVDQA64", WriteVecMove,
- WriteVecStore>,
+ HasAVX512, "VMOVDQA64",
+ SchedWriteVecMoveLS>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
- WriteVecMove, WriteVecLoad, 1>,
- avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
- HasBWI, "VMOVDQU8", WriteVecMove,
- WriteVecStore, 1>,
+ SchedWriteVecMoveLS, 1>,
+ avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
+ "VMOVDQU8", SchedWriteVecMoveLS, 1>,
XD, EVEX_CD8<8, CD8VF>;
defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
- WriteVecMove, WriteVecLoad, 1>,
- avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
- HasBWI, "VMOVDQU16", WriteVecMove,
- WriteVecStore, 1>,
+ SchedWriteVecMoveLS, 1>,
+ avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
+ "VMOVDQU16", SchedWriteVecMoveLS, 1>,
XD, VEX_W, EVEX_CD8<16, CD8VF>;
defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
- WriteVecMove, WriteVecLoad, 1, null_frag>,
- avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
- HasAVX512, "VMOVDQU32", WriteVecMove,
- WriteVecStore, 1>,
+ SchedWriteVecMoveLS, 1, null_frag>,
+ avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
+ "VMOVDQU32", SchedWriteVecMoveLS, 1>,
XS, EVEX_CD8<32, CD8VF>;
defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
- WriteVecMove, WriteVecLoad, 0, null_frag>,
- avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
- HasAVX512, "VMOVDQU64", WriteVecMove,
- WriteVecStore>,
+ SchedWriteVecMoveLS, 0, null_frag>,
+ avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
+ "VMOVDQU64", SchedWriteVecMoveLS>,
XS, VEX_W, EVEX_CD8<64, CD8VF>;
// Special instructions to help with spilling when we don't have VLX. We need
@@ -4338,33 +4331,34 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8i64 (X86vzload addr:$src)),
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
}
+
//===----------------------------------------------------------------------===//
// AVX-512 - Non-temporals
//===----------------------------------------------------------------------===//
-let SchedRW = [WriteVecLoad] in {
- def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
- (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
- [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
- EVEX_CD8<64, CD8VF>;
- let Predicates = [HasVLX] in {
- def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
- (ins i256mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}",
- [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
- EVEX_CD8<64, CD8VF>;
-
- def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
- (ins i128mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}",
- [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
- EVEX_CD8<64, CD8VF>;
- }
+def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
+ (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
+ EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+let Predicates = [HasVLX] in {
+ def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
+ (ins i256mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
+ EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
+
+ def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
+ (ins i128mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
+ EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86SchedWriteMoveLS Sched,
PatFrag st_frag = alignednontemporalstore> {
- let SchedRW = [WriteVecStore], AddedComplexity = 400 in
+ let SchedRW = [Sched.MR], AddedComplexity = 400 in
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(st_frag (_.VT _.RC:$src), addr:$dst)],
@@ -4372,19 +4366,23 @@ multiclass avx512_movnt<bits<8> opc, str
}
multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo VTInfo> {
+ AVX512VLVectorVTInfo VTInfo,
+ X86SchedWriteMoveLSWidths Sched> {
let Predicates = [HasAVX512] in
- defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
+ defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
- defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
- defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
+ defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
+ defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
}
}
-defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
-defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
-defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
+defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
+ SchedWriteVecMoveLS>, PD;
+defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
+ SchedWriteFMoveLS>, PD, VEX_W;
+defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
+ SchedWriteFMoveLS>, PS;
let Predicates = [HasAVX512], AddedComplexity = 400 in {
def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=332090&r1=332089&r2=332090&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Fri May 11 05:46:54 2018
@@ -242,7 +242,7 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem
"movq\t{$src, $dst|$dst, $src}",
[(store (x86mmx VR64:$src), addr:$dst)]>;
-let SchedRW = [WriteVecMove] in {
+let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=332090&r1=332089&r2=332090&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri May 11 05:46:54 2018
@@ -406,64 +406,66 @@ def : InstAlias<"vmovsd\t{$src2, $src1,
multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d> {
+ string asm, Domain d,
+ X86SchedWriteMoveLS sched> {
let hasSideEffects = 0 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>,
- Sched<[WriteFMove]>;
+ Sched<[sched.RR]>;
let canFoldAsLoad = 1, isReMaterializable = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (ld_frag addr:$src))], d>,
- Sched<[WriteFLoad]>;
+ Sched<[sched.RM]>;
}
let Predicates = [HasAVX, NoVLX] in {
-defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
- "movaps", SSEPackedSingle>,
- PS, VEX, VEX_WIG;
-defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
- "movapd", SSEPackedDouble>,
- PD, VEX, VEX_WIG;
-defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
- "movups", SSEPackedSingle>,
- PS, VEX, VEX_WIG;
-defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
- "movupd", SSEPackedDouble>,
- PD, VEX, VEX_WIG;
-
-defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
- "movaps", SSEPackedSingle>,
- PS, VEX, VEX_L, VEX_WIG;
-defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
- "movapd", SSEPackedDouble>,
- PD, VEX, VEX_L, VEX_WIG;
-defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
- "movups", SSEPackedSingle>,
- PS, VEX, VEX_L, VEX_WIG;
-defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
- "movupd", SSEPackedDouble>,
- PD, VEX, VEX_L, VEX_WIG;
+defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
+ SSEPackedSingle, SchedWriteFMoveLS.XMM>,
+ PS, VEX, VEX_WIG;
+defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
+ SSEPackedDouble, SchedWriteFMoveLS.XMM>,
+ PD, VEX, VEX_WIG;
+defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
+ SSEPackedSingle, SchedWriteFMoveLS.XMM>,
+ PS, VEX, VEX_WIG;
+defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
+ SSEPackedDouble, SchedWriteFMoveLS.XMM>,
+ PD, VEX, VEX_WIG;
+
+defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps",
+ SSEPackedSingle, SchedWriteFMoveLS.YMM>,
+ PS, VEX, VEX_L, VEX_WIG;
+defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd",
+ SSEPackedDouble, SchedWriteFMoveLS.YMM>,
+ PD, VEX, VEX_L, VEX_WIG;
+defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
+ SSEPackedSingle, SchedWriteFMoveLS.YMM>,
+ PS, VEX, VEX_L, VEX_WIG;
+defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
+ SSEPackedDouble, SchedWriteFMoveLS.YMM>,
+ PD, VEX, VEX_L, VEX_WIG;
}
let Predicates = [UseSSE1] in {
-defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
- "movaps", SSEPackedSingle>,
- PS;
-defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
- "movups", SSEPackedSingle>,
- PS;
+defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
+ SSEPackedSingle, SchedWriteFMoveLS.XMM>,
+ PS;
+defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
+ SSEPackedSingle, SchedWriteFMoveLS.XMM>,
+ PS;
}
let Predicates = [UseSSE2] in {
-defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
- "movapd", SSEPackedDouble>,
- PD;
-defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
- "movupd", SSEPackedDouble>,
- PD;
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
+ SSEPackedDouble, SchedWriteFMoveLS.XMM>,
+ PD;
+defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
+ SSEPackedDouble, SchedWriteFMoveLS.XMM>,
+ PD;
}
-let SchedRW = [WriteFStore], Predicates = [HasAVX, NoVLX] in {
+let Predicates = [HasAVX, NoVLX] in {
+let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>,
@@ -480,6 +482,9 @@ def VMOVUPDmr : VPDI<0x11, MRMDestMem, (
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)]>,
VEX, VEX_WIG;
+} // SchedRW
+
+let SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v8f32 VR256:$src), addr:$dst)]>,
@@ -497,10 +502,11 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem,
[(store (v4f64 VR256:$src), addr:$dst)]>,
VEX, VEX_L, VEX_WIG;
} // SchedRW
+} // Predicate
// For disassembler
-let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
- SchedRW = [WriteFMove] in {
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+let SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", []>,
@@ -517,6 +523,9 @@ let isCodeGenOnly = 1, ForceDisassemble
(ins VR128:$src),
"movupd\t{$src, $dst|$dst, $src}", []>,
VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">;
+} // SchedRW
+
+let SchedRW = [SchedWriteFMoveLS.YMM.RR] in {
def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movaps\t{$src, $dst|$dst, $src}", []>,
@@ -533,7 +542,8 @@ let isCodeGenOnly = 1, ForceDisassemble
(ins VR256:$src),
"movupd\t{$src, $dst|$dst, $src}", []>,
VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">;
-}
+} // SchedRW
+} // Predicate
// Aliases to help the assembler pick two byte VEX encodings by swapping the
// operands relative to the normal instructions to use VEX.R instead of VEX.B.
@@ -554,7 +564,7 @@ def : InstAlias<"vmovups\t{$src, $dst|$d
def : InstAlias<"vmovupd\t{$src, $dst|$dst, $src}",
(VMOVUPDYrr_REV VR256L:$dst, VR256H:$src), 0>;
-let SchedRW = [WriteFStore] in {
+let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -571,7 +581,7 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (ou
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
- SchedRW = [WriteFMove] in {
+ SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", []>,
FoldGenData<"MOVAPSrr">;
@@ -2982,7 +2992,7 @@ defm : scalar_unary_math_patterns<int_x8
let AddedComplexity = 400 in { // Prefer non-temporal versions
let Predicates = [HasAVX, NoVLX] in {
-let SchedRW = [WriteFStore] in {
+let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -2993,7 +3003,9 @@ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem,
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2f64 VR128:$src),
addr:$dst)]>, VEX, VEX_WIG;
+} // SchedRW
+let SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -3006,21 +3018,23 @@ def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem,
addr:$dst)]>, VEX, VEX_L, VEX_WIG;
} // SchedRW
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in {
+let ExeDomain = SSEPackedInt in {
def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src),
- addr:$dst)]>, VEX, VEX_WIG;
+ addr:$dst)]>, VEX, VEX_WIG,
+ Sched<[SchedWriteVecMoveLS.XMM.MR]>;
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4i64 VR256:$src),
- addr:$dst)]>, VEX, VEX_L, VEX_WIG;
-} // ExeDomain, SchedRW
+ addr:$dst)]>, VEX, VEX_L, VEX_WIG,
+ Sched<[SchedWriteVecMoveLS.YMM.MR]>;
+} // ExeDomain
} // Predicates
-let SchedRW = [WriteFStore] in {
+let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -3029,7 +3043,7 @@ def MOVNTPDmr : PDI<0x2B, MRMDestMem, (o
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
} // SchedRW
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in
+let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
@@ -3142,73 +3156,82 @@ def STMXCSR : I<0xAE, MRM3m, (outs), (in
let ExeDomain = SSEPackedInt in { // SSE integer instructions
-let hasSideEffects = 0, SchedRW = [WriteVecMove] in {
+let hasSideEffects = 0 in {
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG;
-def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L, VEX_WIG;
+ "movdqa\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG;
def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG;
+ "movdqu\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG;
+def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG;
def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L, VEX_WIG;
+ "movdqu\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG;
}
// For Disassembler
-let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
- SchedRW = [WriteVecMove] in {
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_WIG, FoldGenData<"VMOVDQArr">;
+ "movdqa\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.XMM.RR]>,
+ VEX, VEX_WIG, FoldGenData<"VMOVDQArr">;
def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">;
+ "movdqa\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.YMM.RR]>,
+ VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">;
def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">;
+ "movdqu\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.XMM.RR]>,
+ VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">;
def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">;
+ "movdqu\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.YMM.RR]>,
+ VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">;
}
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- hasSideEffects = 0, SchedRW = [WriteVecLoad], Predicates = [HasAVX,NoVLX] in {
+ hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (alignedloadv2i64 addr:$src))]>,
- VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.YMM.RM]>,
VEX, VEX_L, VEX_WIG;
def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (loadv2i64 addr:$src))]>,
+ Sched<[SchedWriteVecMoveLS.XMM.RM]>,
XS, VEX, VEX_WIG;
def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.YMM.RM]>,
XS, VEX, VEX_L, VEX_WIG;
}
-let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteVecStore],
- Predicates = [HasAVX,NoVLX] in {
+let mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[(alignedstore (v2i64 VR128:$src), addr:$dst)]>,
- VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_WIG;
def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, VEX_WIG;
def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",
[(store (v2i64 VR128:$src), addr:$dst)]>,
- XS, VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, VEX_WIG;
def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[]>,
- XS, VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, VEX_WIG;
}
-let SchedRW = [WriteVecMove] in {
+let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
let hasSideEffects = 0 in {
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>;
@@ -3231,7 +3254,7 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg,
} // SchedRW
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- hasSideEffects = 0, SchedRW = [WriteVecLoad] in {
+ hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
@@ -3241,7 +3264,8 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (out
XS, Requires<[UseSSE2]>;
}
-let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteVecStore] in {
+let mayStore = 1, hasSideEffects = 0,
+ SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
@@ -3882,8 +3906,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (o
// SSE2 - Conditional Store
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in {
-
+let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
let Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
@@ -4396,21 +4419,21 @@ let Predicates = [UseSSE3] in {
// SSE3 - Move Unaligned Integer
//===---------------------------------------------------------------------===//
-let SchedRW = [WriteVecLoad] in {
let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
- VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
- VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG;
} // Predicates
+
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
-} // SchedRW
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
+ Sched<[SchedWriteVecMoveLS.XMM.RM]>;
//===---------------------------------------------------------------------===//
// SSE3 - Arithmetic
@@ -6357,18 +6380,18 @@ let Predicates = [UseSSE41] in {
}
let AddedComplexity = 400 in { // Prefer non-temporal versions
-let SchedRW = [WriteVecLoad] in {
+
let Predicates = [HasAVX, NoVLX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_WIG;
+ "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
let Predicates = [HasAVX2, NoVLX] in
def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movntdqa\t{$src, $dst|$dst, $src}", []>;
-} // SchedRW
+ "movntdqa\t{$src, $dst|$dst, $src}", []>,
+ Sched<[SchedWriteVecMoveLS.XMM.RM]>;
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v8f32 (alignednontemporalload addr:$src)),
@@ -7710,7 +7733,7 @@ let hasSideEffects = 0, mayStore = 1 in
def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
(ins i128mem:$dst, VR256:$src1, u8imm:$src2),
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- Sched<[WriteVecStore]>, VEX, VEX_L;
+ Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L;
let Predicates = [HasAVX2, NoVLX] in {
defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=332090&r1=332089&r2=332090&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Fri May 11 05:46:54 2018
@@ -68,6 +68,27 @@ class X86SchedWriteSizes<X86SchedWriteWi
X86SchedWriteWidths PD = sPD;
}
+// Multiclass that wraps move/load/store triple for a vector width.
+class X86SchedWriteMoveLS<SchedWrite MoveRR,
+ SchedWrite LoadRM,
+ SchedWrite StoreMR> {
+ SchedWrite RR = MoveRR;
+ SchedWrite RM = LoadRM;
+ SchedWrite MR = StoreMR;
+}
+
+// Multiclass that wraps X86SchedWriteMoveLS for each vector width.
+class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
+ X86SchedWriteMoveLS s128,
+ X86SchedWriteMoveLS s256,
+ X86SchedWriteMoveLS s512> {
+ X86SchedWriteMoveLS Scl = sScl; // Scalar float/double operations.
+ X86SchedWriteMoveLS MMX = sScl; // MMX operations.
+ X86SchedWriteMoveLS XMM = s128; // XMM operations.
+ X86SchedWriteMoveLS YMM = s256; // YMM operations.
+ X86SchedWriteMoveLS ZMM = s512; // ZMM operations.
+}
+
// Loads, stores, and moves, not folded with other operations.
def WriteLoad : SchedWrite;
def WriteStore : SchedWrite;
@@ -75,7 +96,7 @@ def WriteMove : SchedWrite;
// Arithmetic.
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
-def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
+def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
defm WriteIMul : X86SchedWritePair; // Integer multiplication.
defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
def WriteIMulH : SchedWrite; // Integer multiplication, high part.
@@ -307,6 +328,27 @@ def WriteFence : SchedWrite;
// Nop, not very useful expect it provides a model for nops!
def WriteNop : SchedWrite;
+// Move/Load/Store wrappers.
+def WriteFMoveLS
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>;
+def WriteFMoveLSX
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>;
+def WriteFMoveLSY
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>;
+def SchedWriteFMoveLS
+ : X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
+ WriteFMoveLSY, WriteFMoveLSY>;
+
+def WriteVecMoveLS
+ : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
+def WriteVecMoveLSX
+ : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
+def WriteVecMoveLSY
+ : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
+def SchedWriteVecMoveLS
+ : X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
+ WriteVecMoveLSY, WriteVecMoveLSY>;
+
// Vector width wrappers.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddY>;
More information about the llvm-commits
mailing list