[llvm] d225ff3 - [X86] Add SchedWriteVecExtend scheduler per-width wrapper
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 1 11:19:21 PDT 2022
Author: Simon Pilgrim
Date: 2022-11-01T18:19:06Z
New Revision: d225ff3be79d2181cb9d894849186336aae8d169
URL: https://github.com/llvm/llvm-project/commit/d225ff3be79d2181cb9d894849186336aae8d169
DIFF: https://github.com/llvm/llvm-project/commit/d225ff3be79d2181cb9d894849186336aae8d169.diff
LOG: [X86] Add SchedWriteVecExtend scheduler per-width wrapper
Replaces hard coded uses of WriteShuffleX/WriteVPMOV256 for VPMOVSX/VPMOVZX vector extension instructions
Added:
Modified:
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/lib/Target/X86/X86Schedule.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index e4373bb16f100..2b823114a0357 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -10168,19 +10168,19 @@ multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWr
multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
+ X86SchedWriteWidths sched,
PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasBWI] in {
- defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v8i16x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
- defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i16x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasBWI] in {
- defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v32i16_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
v32i8x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
}
@@ -10188,19 +10188,19 @@ multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
+ X86SchedWriteWidths sched,
PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
- defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
v16i8x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
- defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
- defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
}
@@ -10208,19 +10208,19 @@ multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
+ X86SchedWriteWidths sched,
PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
- defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
v16i8x_info, i16mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
- defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
v16i8x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
- defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
}
@@ -10228,19 +10228,19 @@ multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
+ X86SchedWriteWidths sched,
PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
- defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
v8i16x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
- defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
- defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
v16i16x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
}
@@ -10248,19 +10248,19 @@ multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
+ X86SchedWriteWidths sched,
PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
- defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
v8i16x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
- defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
v8i16x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
- defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
}
@@ -10268,38 +10268,38 @@ multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
+ X86SchedWriteWidths sched,
PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
let Predicates = [HasVLX, HasAVX512] in {
- defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
v4i32x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
- defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
v4i32x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
- defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
v8i32x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
}
}
-defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
-defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
-defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
-defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
-defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
-defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
+defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
+defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>;
+defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
+defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
+defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
-defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
-defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
-defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
-defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
-defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
-defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
+defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
+defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>;
+defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
+defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
+defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
// Patterns that we also need any extend versions of. aext_vector_inreg
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index dde7c0f36fd1b..f90202e98a941 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5003,11 +5003,11 @@ multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
SchedWriteShuffle.XMM>;
let Predicates = [HasAVX, prd] in
defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
- VR128, VR128, SchedWriteShuffle.XMM>,
+ VR128, VR128, SchedWriteVecExtend.XMM>,
VEX, VEX_WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
- VR256, VR128, WriteVPMOV256>,
+ VR256, VR128, SchedWriteVecExtend.YMM>,
VEX, VEX_L, VEX_WIG;
}
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index 16c4c57bcb737..b156396660805 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -678,6 +678,9 @@ def SchedWritePSADBW
: X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
WritePSADBWY, WritePSADBWZ>;
+def SchedWriteVecExtend
+ : X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
+ WriteVPMOV256, WriteVPMOV256>;
def SchedWriteShuffle
: X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
WriteShuffleY, WriteShuffleZ>;
More information about the llvm-commits
mailing list