[llvm] 810b8fd - [X86] Replace unnecessary CVTPS2PI/CVTPS2DQ overrides with better base class defs
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 9 09:11:57 PST 2022
Author: Simon Pilgrim
Date: 2022-11-09T17:08:45Z
New Revision: 810b8fdff92ae8c234041234fdc1a175c3eb1ff9
URL: https://github.com/llvm/llvm-project/commit/810b8fdff92ae8c234041234fdc1a175c3eb1ff9
DIFF: https://github.com/llvm/llvm-project/commit/810b8fdff92ae8c234041234fdc1a175c3eb1ff9.diff
LOG: [X86] Replace unnecessary CVTPS2PI/CVTPS2DQ overrides with better base class defs
Broadwell/Haswell were completely overriding the WriteCvtPD2I class defs - we can remove those overrides entirely by just choosing better class defs.
Also fixes the scheduler for a missing YMM folded case - confirmed with Agner + uops.info that the port usage is correct
Added:
Modified:
llvm/lib/Target/X86/X86SchedBroadwell.td
llvm/lib/Target/X86/X86SchedHaswell.td
llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index c6bc7751cb1fc..7156c2ea9d592 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -362,8 +362,8 @@ defm : BWWriteResPair<WriteCvtPS2I, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPS2IY, [BWPort1], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : BWWriteResPair<WriteCvtSD2I, [BWPort1,BWPort0], 4, [1,1], 2, 5>;
-defm : BWWriteResPair<WriteCvtPD2I, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2I, [BWPort1,BWPort5], 4, [1,1], 2, 5>;
+defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1,BWPort5], 6, [1,1], 2, 6>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : BWWriteResPair<WriteCvtI2SS, [BWPort1], 4>;
@@ -851,12 +851,10 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIrr",
- "MMX_CVT(T?)PS2PIrr",
+def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PS2PIrr",
"(V?)CVTSI642SDrr",
"(V?)CVTSI2SDrr",
- "(V?)CVTSI2SSrr",
- "(V?)CVT(T?)PD2DQrr")>;
+ "(V?)CVTSI2SSrr")>;
def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> {
let Latency = 4;
@@ -968,14 +966,6 @@ def: InstRW<[BWWriteResGroup59], (instrs CVTPS2PDrm, VCVTPS2PDrm,
VPSLLVQrm,
VPSRLVQrm)>;
-def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup60], (instrs VCVTPD2DQYrr,
- VCVTTPD2DQYrr)>;
-
def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
@@ -1188,15 +1178,6 @@ def: InstRW<[BWWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
def: InstRW<[BWWriteResGroup101], (instrs VCVTPS2DQYrm,
VCVTTPS2DQYrm)>;
-def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup107], (instrs CVTPD2DQrm, VCVTPD2DQrm,
- CVTTPD2DQrm, VCVTTPD2DQrm)>;
-def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIrm")>;
-
def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> {
let Latency = 9;
let NumMicroOps = 3;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 0cd007cdce907..7c5804fc198f2 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -356,9 +356,9 @@ defm : HWWriteResPair<WriteFVarBlendZ, [HWPort5], 2, [2], 2, 7>; // Unsupported
// Conversion between integer and float.
defm : HWWriteResPair<WriteCvtSD2I, [HWPort1,HWPort0], 4, [1,1], 2, 5>;
-defm : HWWriteResPair<WriteCvtPD2I, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPD2IY, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPD2IZ, [HWPort1], 3>; // Unsupported = 1
+defm : HWWriteResPair<WriteCvtPD2I, [HWPort1,HWPort5], 4, [1,1], 2, 6>;
+defm : HWWriteResPair<WriteCvtPD2IY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
+defm : HWWriteResPair<WriteCvtPD2IZ, [HWPort1,HWPort5], 6, [1,1], 2, 6>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtSS2I, [HWPort1,HWPort0], 4, [1,1], 2, 5>;
defm : HWWriteResPair<WriteCvtPS2I, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteCvtPS2IY, [HWPort1], 3, [1], 1, 7>;
@@ -1354,13 +1354,10 @@ def HWWriteResGroup73 : SchedWriteRes<[HWPort1,HWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPD2PIrr,
- MMX_CVTPS2PIrr,
- MMX_CVTTPD2PIrr,
+def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPS2PIrr,
MMX_CVTTPS2PIrr)>;
def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTSI(64)?2SDrr",
- "(V?)CVTSI2SSrr",
- "(V?)CVT(T?)PD2DQrr")>;
+ "(V?)CVTSI2SSrr")>;
def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 11;
@@ -1369,16 +1366,6 @@ def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
}
def: InstRW<[HWWriteResGroup75], (instregex "FICOM(P?)(16|32)m")>;
-def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
- let Latency = 10;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup78], (instrs CVTPD2DQrm, VCVTPD2DQrm,
- CVTTPD2DQrm, VCVTTPD2DQrm,
- MMX_CVTPD2PIrm,
- MMX_CVTTPD2PIrm)>;
-
def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 9;
let NumMicroOps = 3;
@@ -1479,14 +1466,6 @@ def HWWriteResGroup100 : SchedWriteRes<[HWPort06,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup100], (instrs XSETBV)>;
-def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup102], (instrs VCVTPD2DQYrr,
- VCVTTPD2DQYrr)>;
-
def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 13;
let NumMicroOps = 3;
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
index 27c6120d84987..c33cc79bd6c18 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
@@ -1125,7 +1125,7 @@ vzeroupper
# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 3 12 1.00 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtpd2ps %ymm0, %xmm2
@@ -1161,7 +1161,7 @@ vzeroupper
# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 3 12 1.00 * vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %ymm0, %ymm2
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 430.25 2.25 12.67
+# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 432.25 2.25 12.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1835,7 +1835,7 @@ vzeroupper
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2
@@ -1871,7 +1871,7 @@ vzeroupper
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %ymm0, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
index ea7d251ffccef..3da547de54e36 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
@@ -1125,7 +1125,7 @@ vzeroupper
# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 3 12 1.00 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtpd2ps %ymm0, %xmm2
@@ -1161,7 +1161,7 @@ vzeroupper
# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 3 12 1.00 * vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %ymm0, %ymm2
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 433.58 2.25 12.67
+# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 435.58 2.25 12.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1835,7 +1835,7 @@ vzeroupper
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2
@@ -1871,7 +1871,7 @@ vzeroupper
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %ymm0, %ymm2
More information about the llvm-commits
mailing list