[llvm] 83de8c2 - [X86] Fix SkylakeClient ports for int-to-double conversions
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 08:32:51 PDT 2024
Author: Simon Pilgrim
Date: 2024-08-27T16:32:29+01:00
New Revision: 83de8c2369d344b775aac4eeaac0746fcdce6849
URL: https://github.com/llvm/llvm-project/commit/83de8c2369d344b775aac4eeaac0746fcdce6849
DIFF: https://github.com/llvm/llvm-project/commit/83de8c2369d344b775aac4eeaac0746fcdce6849.diff
LOG: [X86] Fix SkylakeClient ports for int-to-double conversions
These are performed on SKLPort01 (+ SKLPort5/SKLPort23 for rr/rm shuffles/loads)
Also, cleanup some MMX CVT overrides that match the SSE equivalents.
Matches uops.info + Agner
Added:
Modified:
llvm/lib/Target/X86/X86SchedSkylakeClient.td
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 996124c772dff4..5506ebe0cb6342 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -460,8 +460,10 @@ defm : SKLWriteResPair<WriteCvtI2PSY, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : X86WriteRes<WriteCvtI2SD, [SKLPort5,SKLPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtI2SDLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
-defm : SKLWriteResPair<WriteCvtI2PD, [SKLPort0,SKLPort5], 5, [1,1], 2, 6>;
-defm : SKLWriteResPair<WriteCvtI2PDY, [SKLPort0,SKLPort5], 7, [1,1], 2, 6>;
+defm : X86WriteRes<WriteCvtI2PD, [SKLPort5,SKLPort01], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDYLd, [SKLPort23,SKLPort01], 11, [1,1], 2>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : X86WriteRes<WriteCvtSS2SD, [SKLPort5,SKLPort01], 5, [1,1], 2>;
@@ -925,7 +927,7 @@ def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
-def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort01]> {
+def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort0]> {
let Latency = 5;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
@@ -965,7 +967,7 @@ def: InstRW<[SKLWriteResGroup67], (instregex "(V?)MOVSHDUPrm",
"(V?)MOVSLDUPrm",
"(V?)MOVDDUPrm")>;
-def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> {
+def SKLWriteResGroup68 : SchedWriteRes<[SKLPort01]> {
let Latency = 6;
let NumMicroOps = 2;
let ReleaseAtCycles = [2];
@@ -1239,13 +1241,6 @@ def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
}
def: SchedAlias<WriteADCRMW, SKLWriteResGroup119>;
-def SKLWriteResGroup120 : SchedWriteRes<[SKLPort0,SKLPort23]> {
- let Latency = 9;
- let NumMicroOps = 2;
- let ReleaseAtCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup120], (instrs MMX_CVTPI2PSrm)>;
-
def SKLWriteResGroup121 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 2;
@@ -1258,7 +1253,7 @@ def: InstRW<[SKLWriteResGroup121], (instrs PCMPGTQrm,
VPMOVSXWDYrm,
VPMOVZXWDYrm)>;
-def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
+def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort0]> {
let Latency = 9;
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
@@ -1290,13 +1285,6 @@ def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m")>;
def: InstRW<[SKLWriteResGroup133], (instrs VPCMPGTQYrm)>;
-def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
- let Latency = 10;
- let NumMicroOps = 3;
- let ReleaseAtCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup138], (instrs MMX_CVTPI2PDrm)>;
-
def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
let Latency = 10;
let NumMicroOps = 4;
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
index ae3144227e98df..e05911e4709dc1 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
@@ -1115,9 +1115,9 @@ vzeroupper
# CHECK-NEXT: 1 2 1.00 vcomiss %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * vcomiss (%rax), %xmm1
# CHECK-NEXT: 2 5 1.00 vcvtdq2pd %xmm0, %xmm2
-# CHECK-NEXT: 3 11 1.00 * vcvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: 2 10 0.50 * vcvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 vcvtdq2pd %xmm0, %ymm2
-# CHECK-NEXT: 3 13 1.00 * vcvtdq2pd (%rax), %ymm2
+# CHECK-NEXT: 2 11 0.50 * vcvtdq2pd (%rax), %ymm2
# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %ymm0, %ymm2
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 126.00 340.58 198.58 173.83 173.83 38.00 326.58 6.25 11.33
+# CHECK-NEXT: - 126.00 338.58 200.58 173.83 173.83 38.00 324.58 6.25 11.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1824,10 +1824,10 @@ vzeroupper
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomisd (%rax), %xmm1
# CHECK-NEXT: - - 1.00 - - - - - - - vcomiss %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomiss (%rax), %xmm1
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2
-# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2
-# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2pd (%rax), %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
index b066ce3ca1926d..142006c3c3d384 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
@@ -208,10 +208,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 2 9 0.50 * cmpeqss (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 comiss %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * comiss (%rax), %xmm1
-# CHECK-NEXT: 2 6 2.00 cvtpi2ps %mm0, %xmm2
-# CHECK-NEXT: 2 9 1.00 * cvtpi2ps (%rax), %xmm2
+# CHECK-NEXT: 2 6 1.00 cvtpi2ps %mm0, %xmm2
+# CHECK-NEXT: 2 10 0.50 * cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtps2pi %xmm0, %mm2
-# CHECK-NEXT: 2 9 0.50 * cvtps2pi (%rax), %mm2
+# CHECK-NEXT: 2 9 1.00 * cvtps2pi (%rax), %mm2
# CHECK-NEXT: 2 5 1.00 cvtsi2ss %ecx, %xmm2
# CHECK-NEXT: 3 6 2.00 cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtsi2ssl (%rax), %xmm2
@@ -221,7 +221,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %ecx
# CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %rcx
# CHECK-NEXT: 2 5 1.00 cvttps2pi %xmm0, %mm2
-# CHECK-NEXT: 2 9 0.50 * cvttps2pi (%rax), %mm2
+# CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2
# CHECK-NEXT: 2 6 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %ecx
@@ -333,7 +333,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 24.00 72.83 23.83 32.00 32.00 8.00 31.83 0.50 3.00
+# CHECK-NEXT: - 24.00 73.33 23.33 32.00 32.00 8.00 31.83 0.50 3.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -351,10 +351,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpeqss (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - comiss %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comiss (%rax), %xmm1
-# CHECK-NEXT: - - 2.00 - - - - - - - cvtpi2ps %mm0, %xmm2
-# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2
-# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pi %xmm0, %mm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2pi (%rax), %mm2
+# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtpi2ps %mm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtps2pi %xmm0, %mm2
+# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtps2pi (%rax), %mm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2ss %ecx, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2
@@ -363,8 +363,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %ecx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %rcx
-# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttps2pi %xmm0, %mm2
-# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2pi (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvttps2pi %xmm0, %mm2
+# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvttps2pi (%rax), %mm2
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvttss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvttss2si (%rax), %ecx
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
index 188a07ea71de17..b527c1cda3a8fc 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
@@ -423,7 +423,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 comisd %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * comisd (%rax), %xmm1
# CHECK-NEXT: 2 5 1.00 cvtdq2pd %xmm0, %xmm2
-# CHECK-NEXT: 3 11 1.00 * cvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: 2 10 0.50 * cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtpd2dq %xmm0, %xmm2
@@ -433,7 +433,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 3 11 1.00 * cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtpi2pd %mm0, %xmm2
-# CHECK-NEXT: 3 10 1.00 * cvtpi2pd (%rax), %xmm2
+# CHECK-NEXT: 2 10 0.50 * cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtps2dq (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtps2pd %xmm0, %xmm2
@@ -689,7 +689,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 40.00 113.58 79.58 63.50 63.50 14.00 93.58 2.25 5.00
+# CHECK-NEXT: - 40.00 111.58 81.58 63.50 63.50 14.00 91.58 2.25 5.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -708,8 +708,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpeqsd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - comisd %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comisd (%rax), %xmm1
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtdq2pd %xmm0, %xmm2
-# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2
@@ -718,8 +718,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2pi (%rax), %mm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtpi2pd %mm0, %xmm2
-# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpi2pd %mm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pd %xmm0, %xmm2
More information about the llvm-commits
mailing list