[llvm] edf8855 - [X86] Replace unnecessary int2float and float2double overrides with better base class defs
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 5 12:07:09 PDT 2022
Author: Simon Pilgrim
Date: 2022-11-05T19:07:01Z
New Revision: edf885531e9e38fb127f6075373b706acef7b59c
URL: https://github.com/llvm/llvm-project/commit/edf885531e9e38fb127f6075373b706acef7b59c
DIFF: https://github.com/llvm/llvm-project/commit/edf885531e9e38fb127f6075373b706acef7b59c.diff
LOG: [X86] Replace unnecessary int2float and float2double overrides with better base class defs
Broadwell/Haswell were completely overriding the class defs - we can remove those overrides entirely by just choosing better class defs (plus a fix for missing mmx folded load).
Added:
Modified:
llvm/lib/Target/X86/X86SchedBroadwell.td
llvm/lib/Target/X86/X86SchedHaswell.td
llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 003a4e376adb..d4ffdea79c5c 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -367,17 +367,17 @@ defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : BWWriteResPair<WriteCvtI2SS, [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PS, [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PSY, [BWPort1], 4>;
+defm : BWWriteResPair<WriteCvtI2PS, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtI2PSY, [BWPort1], 3, [1], 1, 6>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : BWWriteResPair<WriteCvtI2SD, [BWPort1], 4>;
defm : BWWriteResPair<WriteCvtI2PD, [BWPort1,BWPort5], 4, [1,1], 2, 5>;
defm : BWWriteResPair<WriteCvtI2PDY, [BWPort1,BWPort5], 6, [1,1], 2, 5>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
-defm : BWWriteResPair<WriteCvtSS2SD, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2PD, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtSS2SD, [BWPort0,BWPort5], 2, [1,1], 2, 5>;
+defm : BWWriteResPair<WriteCvtPS2PD, [BWPort0,BWPort5], 2, [1,1], 2, 5>;
+defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort0,BWPort5], 4, [1,1], 2, 5>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1,BWPort5], 4, [1,1], 2, 5>;
defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1,BWPort5], 4, [1,1], 2, 5>;
@@ -715,14 +715,6 @@ def: InstRW<[BWWriteResGroup14], (instrs LFENCE,
WAIT,
XGETBV)>;
-def BWWriteResGroup15 : SchedWriteRes<[BWPort0,BWPort5]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup15], (instregex "(V?)CVTPS2PDrr",
- "(V?)CVTSS2SDrr")>;
-
def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -784,9 +776,7 @@ def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup27], (instrs MMX_CVTPI2PSrr)>;
-def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr",
- "(V?)CVTDQ2PS(Y?)rr")>;
+def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr")>;
def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> {
let Latency = 3;
@@ -858,13 +848,6 @@ def: InstRW<[BWWriteResGroup39], (instregex "(V?)CVT(T?)SD2SI64rr",
"(V?)CVT(T?)SS2SI64rr",
"(V?)CVT(T?)SS2SIrr")>;
-def BWWriteResGroup40 : SchedWriteRes<[BWPort0,BWPort5]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup40], (instrs VCVTPS2PDYrr)>;
-
def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> {
let Latency = 4;
let NumMicroOps = 2;
@@ -1164,9 +1147,6 @@ def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup91], (instrs MMX_CVTPI2PSrm,
- CVTDQ2PSrm,
- VCVTDQ2PSrm)>;
def: InstRW<[BWWriteResGroup91], (instregex "P(DEP|EXT)(32|64)rm")>;
def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> {
@@ -1230,13 +1210,6 @@ def: InstRW<[BWWriteResGroup105], (instregex "(V?)CVTSS2SI(64)?rm",
"VCVTTSS2SI64rm",
"(V?)CVTTSS2SIrm")>;
-def BWWriteResGroup106 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup106], (instrs VCVTPS2PDYrm)>;
-
def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
let Latency = 9;
let NumMicroOps = 3;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 6cff9c30ee16..13b0ed25361e 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -360,23 +360,23 @@ defm : HWWriteResPair<WriteCvtPD2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2IY, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2IZ, [HWPort1], 3>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtSS2I, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2I, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2IY, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2IZ, [HWPort1], 3>; // Unsupported = 1
+defm : HWWriteResPair<WriteCvtPS2I, [HWPort1], 3, [1], 1, 6>;
+defm : HWWriteResPair<WriteCvtPS2IY, [HWPort1], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteCvtPS2IZ, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtI2SD, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtI2PD, [HWPort1,HWPort5], 4, [1,1], 2, 6>;
defm : HWWriteResPair<WriteCvtI2PDY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
defm : HWWriteResPair<WriteCvtI2PDZ, [HWPort1,HWPort5], 6, [1,1], 2, 6>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtI2SS, [HWPort1], 4>;
-defm : HWWriteResPair<WriteCvtI2PS, [HWPort1], 4>;
-defm : HWWriteResPair<WriteCvtI2PSY, [HWPort1], 4>;
-defm : HWWriteResPair<WriteCvtI2PSZ, [HWPort1], 4>; // Unsupported = 1
-
-defm : HWWriteResPair<WriteCvtSS2SD, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2PD, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2PDZ, [HWPort1], 3>; // Unsupported = 1
+defm : HWWriteResPair<WriteCvtI2PS, [HWPort1], 3, [1], 1, 6>;
+defm : HWWriteResPair<WriteCvtI2PSY, [HWPort1], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteCvtI2PSZ, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
+
+defm : HWWriteResPair<WriteCvtSS2SD, [HWPort0,HWPort5], 2, [1,1], 2, 5>;
+defm : HWWriteResPair<WriteCvtPS2PD, [HWPort0,HWPort5], 2, [1,1], 2, 5>;
+defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort0,HWPort5], 4, [1,1], 2, 6>;
+defm : HWWriteResPair<WriteCvtPS2PDZ, [HWPort0,HWPort5], 4, [1,1], 2, 6>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtSD2SS, [HWPort1,HWPort5], 4, [1,1], 2, 5>;
defm : HWWriteResPair<WriteCvtPD2PS, [HWPort1,HWPort5], 4, [1,1], 2, 6>;
defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
@@ -1131,14 +1131,6 @@ def: InstRW<[HWWriteResGroup30], (instrs LFENCE,
WAIT,
XGETBV)>;
-def HWWriteResGroup31 : SchedWriteRes<[HWPort0,HWPort5]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup31], (instregex "(V?)CVTPS2PDrr",
- "(V?)CVTSS2SDrr")>;
-
def HWWriteResGroup32 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -1241,9 +1233,7 @@ def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup50], (instrs MMX_CVTPI2PSrr)>;
-def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr",
- "(V?)CVTDQ2PS(Y?)rr")>;
+def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr")>;
def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> {
let Latency = 3;
@@ -1267,8 +1257,7 @@ def HWWriteResGroup52_1 : SchedWriteRes<[HWPort1,HWPort23]> {
}
def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m")>;
-def: InstRW<[HWWriteResGroup52_1], (instrs VCVTDQ2PSYrm,
- VCVTPS2DQYrm,
+def: InstRW<[HWWriteResGroup52_1], (instrs VCVTPS2DQYrm,
VCVTTPS2DQYrm)>;
def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> {
@@ -1369,13 +1358,6 @@ def HWWriteResGroup70 : SchedWriteRes<[HWPort0,HWPort1]> {
def: InstRW<[HWWriteResGroup70], (instregex "(V?)CVT(T?)SD2SI(64)?rr",
"(V?)CVT(T?)SS2SI(64)?rr")>;
-def HWWriteResGroup71 : SchedWriteRes<[HWPort0,HWPort5]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup71], (instrs VCVTPS2PDYrr)>;
-
def HWWriteResGroup72 : SchedWriteRes<[HWPort0,HWPort0156]> {
let Latency = 4;
let NumMicroOps = 2;
@@ -1414,13 +1396,6 @@ def: InstRW<[HWWriteResGroup76], (instregex "(V?)CVTSD2SI(64)?rm",
"VCVTTSS2SI64rm",
"(V?)CVTTSS2SIrm")>;
-def HWWriteResGroup77 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> {
- let Latency = 10;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup77], (instrs VCVTPS2PDYrm)>;
-
def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 10;
let NumMicroOps = 3;
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s
index 17203584ea3f..a79a47724f60 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s
@@ -211,7 +211,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvtps2pi %xmm0, %mm2
-# CHECK-NEXT: 2 8 1.00 * cvtps2pi (%rax), %mm2
+# CHECK-NEXT: 2 9 1.00 * cvtps2pi (%rax), %mm2
# CHECK-NEXT: 2 4 1.00 cvtsi2ss %ecx, %xmm2
# CHECK-NEXT: 3 5 2.00 cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
@@ -221,7 +221,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %ecx
# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %rcx
# CHECK-NEXT: 2 4 1.00 cvttps2pi %xmm0, %mm2
-# CHECK-NEXT: 2 8 1.00 * cvttps2pi (%rax), %mm2
+# CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2
# CHECK-NEXT: 2 4 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 2 4 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx
More information about the llvm-commits
mailing list