[llvm] 313a4ae - [X86] Fix scheduler tag for GFNI YMM instructions
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 13 06:13:01 PST 2022
Author: Simon Pilgrim
Date: 2022-11-13T14:10:09Z
New Revision: 313a4aef7f501f3d21c9843ed67248df6ea30d29
URL: https://github.com/llvm/llvm-project/commit/313a4aef7f501f3d21c9843ed67248df6ea30d29
DIFF: https://github.com/llvm/llvm-project/commit/313a4aef7f501f3d21c9843ed67248df6ea30d29.diff
LOG: [X86] Fix scheduler tag for GFNI YMM instructions
These were hardcoded to XMM width
Added:
Modified:
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/tools/llvm-mca/X86/Generic/resources-avx512gfnivl.s
llvm/test/tools/llvm-mca/X86/Generic/resources-avxgfni.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512gfnivl.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avxgfni.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index e648c30c30fb..6f173b037c0f 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8055,7 +8055,8 @@ let Predicates = [HasAVX2] in {
multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT,
RegisterClass RC, PatFrag MemOpFrag,
- X86MemOperand X86MemOp, bit Is2Addr = 0> {
+ X86MemOperand X86MemOp, X86FoldableSchedWrite sched,
+ bit Is2Addr = 0> {
let ExeDomain = SSEPackedInt,
AsmString = !if(Is2Addr,
OpcodeStr#"\t{$src2, $dst|$dst, $src2}",
@@ -8063,31 +8064,32 @@ multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT,
let isCommutable = 1 in
def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "",
[(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>,
- Sched<[SchedWriteVecALU.XMM]>, T8PD;
+ Sched<[sched]>, T8PD;
def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "",
[(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1,
(MemOpFrag addr:$src2))))]>,
- Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, T8PD;
}
}
multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag,
- X86MemOperand X86MemOp, bit Is2Addr = 0> {
+ X86MemOperand X86MemOp, X86FoldableSchedWrite sched,
+ bit Is2Addr = 0> {
let AsmString = !if(Is2Addr,
OpStr#"\t{$src3, $src2, $dst|$dst, $src2, $src3}",
OpStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in {
def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), "",
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))],
- SSEPackedInt>, Sched<[SchedWriteVecIMul.XMM]>;
+ SSEPackedInt>, Sched<[sched]>;
def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
[(set RC:$dst, (OpVT (OpNode RC:$src1,
(MemOpFrag addr:$src2),
timm:$src3)))], SSEPackedInt>,
- Sched<[SchedWriteVecIMul.XMM.Folded, SchedWriteVecIMul.XMM.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8095,12 +8097,14 @@ multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
let Constraints = "$src1 = $dst",
Predicates = [HasGFNI, UseSSE2] in
defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode,
- VR128, load, i128mem, 1>;
+ VR128, load, i128mem, SchedWriteVecIMul.XMM, 1>;
let Predicates = [HasGFNI, HasAVX, NoVLX] in {
defm V#NAME : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128,
- load, i128mem>, VEX_4V, VEX_W;
+ load, i128mem, SchedWriteVecIMul.XMM>,
+ VEX_4V, VEX_W;
defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256,
- load, i256mem>, VEX_4V, VEX_L, VEX_W;
+ load, i256mem, SchedWriteVecIMul.YMM>,
+ VEX_4V, VEX_L, VEX_W;
}
}
@@ -8108,12 +8112,12 @@ multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
let Constraints = "$src1 = $dst",
Predicates = [HasGFNI, UseSSE2] in
defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop,
- i128mem, 1>;
+ i128mem, SchedWriteVecALU.XMM, 1>;
let Predicates = [HasGFNI, HasAVX, NoVLX] in {
defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load,
- i128mem>, VEX_4V;
+ i128mem, SchedWriteVecALU.XMM>, VEX_4V;
defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load,
- i256mem>, VEX_4V, VEX_L;
+ i256mem, SchedWriteVecALU.YMM>, VEX_4V, VEX_L;
}
// GF2P8AFFINEINVQB, GF2P8AFFINEQB
let isCommutable = 0 in {
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512gfnivl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512gfnivl.s
index 9d3664d8a0db..2d5c8db16746 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512gfnivl.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512gfnivl.s
@@ -74,7 +74,7 @@ vgf2p8mulb (%rax), %ymm1, %ymm2 {z}{k1}
# CHECK-NEXT: 2 11 1.00 * vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 2 11 1.00 * vgf2p8affineinvqb $0, (%rax){1to2}, %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 1 5 1.00 vgf2p8affineinvqb $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 11 1.00 * vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 1.00 * vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 12 1.00 * vgf2p8affineinvqb $0, (%rax){1to4}, %ymm1, %ymm2
# CHECK-NEXT: 1 5 1.00 vgf2p8affineinvqb $0, %ymm0, %ymm1, %ymm2 {%k1}
# CHECK-NEXT: 2 12 1.00 * vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2 {%k1}
@@ -92,10 +92,10 @@ vgf2p8mulb (%rax), %ymm1, %ymm2 {z}{k1}
# CHECK-NEXT: 2 11 1.00 * vgf2p8affineqb $0, (%rax), %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 2 11 1.00 * vgf2p8affineqb $0, (%rax){1to2}, %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 1 5 1.00 vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 11 1.00 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 1.00 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 12 1.00 * vgf2p8affineqb $0, (%rax){1to4}, %ymm1, %ymm2
# CHECK-NEXT: 1 5 1.00 vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 11 1.00 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 1.00 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 12 1.00 * vgf2p8affineqb $0, (%rax){1to4}, %ymm1, %ymm2
# CHECK-NEXT: 1 5 1.00 vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2 {%k1} {z}
# CHECK-NEXT: 2 12 1.00 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2 {%k1} {z}
@@ -107,7 +107,7 @@ vgf2p8mulb (%rax), %ymm1, %ymm2 {z}{k1}
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %xmm0, %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 2 7 0.50 * vgf2p8mulb (%rax), %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 7 0.50 * vgf2p8mulb (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vgf2p8mulb (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %ymm0, %ymm1, %ymm2 {%k1}
# CHECK-NEXT: 2 8 0.50 * vgf2p8mulb (%rax), %ymm1, %ymm2 {%k1}
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %ymm0, %ymm1, %ymm2 {%k1} {z}
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avxgfni.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avxgfni.s
index 729e0921679e..364a4e6c33ea 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avxgfni.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avxgfni.s
@@ -31,15 +31,15 @@ vgf2p8mulb (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 1.00 vgf2p8affineinvqb $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 11 1.00 * vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 1.00 vgf2p8affineinvqb $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 11 1.00 * vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 1.00 * vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 1.00 vgf2p8affineqb $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 11 1.00 * vgf2p8affineqb $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 1.00 vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 11 1.00 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 1.00 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.50 * vgf2p8mulb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 7 0.50 * vgf2p8mulb (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vgf2p8mulb (%rax), %ymm1, %ymm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512gfnivl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512gfnivl.s
index c284985ae3b1..0d97488d88d3 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512gfnivl.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512gfnivl.s
@@ -74,7 +74,7 @@ vgf2p8mulb (%rax), %ymm1, %ymm2 {z}{k1}
# CHECK-NEXT: 2 11 0.50 * vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 2 11 0.50 * vgf2p8affineinvqb $0, (%rax){1to2}, %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 1 5 0.50 vgf2p8affineinvqb $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 11 0.50 * vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 0.50 * vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 12 0.50 * vgf2p8affineinvqb $0, (%rax){1to4}, %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vgf2p8affineinvqb $0, %ymm0, %ymm1, %ymm2 {%k1}
# CHECK-NEXT: 2 12 0.50 * vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2 {%k1}
@@ -92,10 +92,10 @@ vgf2p8mulb (%rax), %ymm1, %ymm2 {z}{k1}
# CHECK-NEXT: 2 11 0.50 * vgf2p8affineqb $0, (%rax), %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 2 11 0.50 * vgf2p8affineqb $0, (%rax){1to2}, %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 1 5 0.50 vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 11 0.50 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 0.50 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 12 0.50 * vgf2p8affineqb $0, (%rax){1to4}, %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 11 0.50 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 0.50 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 12 0.50 * vgf2p8affineqb $0, (%rax){1to4}, %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2 {%k1} {z}
# CHECK-NEXT: 2 12 0.50 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2 {%k1} {z}
@@ -107,7 +107,7 @@ vgf2p8mulb (%rax), %ymm1, %ymm2 {z}{k1}
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %xmm0, %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 2 7 0.50 * vgf2p8mulb (%rax), %xmm1, %xmm2 {%k1} {z}
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 7 0.50 * vgf2p8mulb (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vgf2p8mulb (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %ymm0, %ymm1, %ymm2 {%k1}
# CHECK-NEXT: 2 8 0.50 * vgf2p8mulb (%rax), %ymm1, %ymm2 {%k1}
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %ymm0, %ymm1, %ymm2 {%k1} {z}
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avxgfni.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avxgfni.s
index 10176e18e5d0..5e1abf4fd8ab 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avxgfni.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avxgfni.s
@@ -31,15 +31,15 @@ vgf2p8mulb (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vgf2p8affineinvqb $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 11 0.50 * vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vgf2p8affineinvqb $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 11 0.50 * vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 0.50 * vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vgf2p8affineqb $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 11 0.50 * vgf2p8affineqb $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 11 0.50 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 0.50 * vgf2p8affineqb $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.50 * vgf2p8mulb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vgf2p8mulb %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 7 0.50 * vgf2p8mulb (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vgf2p8mulb (%rax), %ymm1, %ymm2
# CHECK: Resources:
# CHECK-NEXT: [0] - ICXDivider
More information about the llvm-commits
mailing list