[llvm] 83de8c2 - [X86] Fix SkylakeClient ports for int-to-double conversions

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 08:32:51 PDT 2024


Author: Simon Pilgrim
Date: 2024-08-27T16:32:29+01:00
New Revision: 83de8c2369d344b775aac4eeaac0746fcdce6849

URL: https://github.com/llvm/llvm-project/commit/83de8c2369d344b775aac4eeaac0746fcdce6849
DIFF: https://github.com/llvm/llvm-project/commit/83de8c2369d344b775aac4eeaac0746fcdce6849.diff

LOG: [X86] Fix SkylakeClient ports for int-to-double conversions

These are performed on SKLPort01 (+ SKLPort5/SKLPort23 for rr/rm shuffles/loads)

Also, cleanup some MMX CVT overrides that match the SSE equivalents.

Matches uops.info + Agner

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
    llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 996124c772dff4..5506ebe0cb6342 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -460,8 +460,10 @@ defm : SKLWriteResPair<WriteCvtI2PSY,  [SKLPort01], 4, [1], 1, 7>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
 defm : X86WriteRes<WriteCvtI2SD,      [SKLPort5,SKLPort01],  5, [1,1], 2>;
 defm : X86WriteRes<WriteCvtI2SDLd,   [SKLPort23,SKLPort01], 10, [1,1], 2>;
-defm : SKLWriteResPair<WriteCvtI2PD,   [SKLPort0,SKLPort5],  5, [1,1], 2, 6>;
-defm : SKLWriteResPair<WriteCvtI2PDY,  [SKLPort0,SKLPort5],  7, [1,1], 2, 6>;
+defm : X86WriteRes<WriteCvtI2PD,      [SKLPort5,SKLPort01],  5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDLd,   [SKLPort23,SKLPort01], 10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDY,     [SKLPort5,SKLPort01],  7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDYLd,  [SKLPort23,SKLPort01], 11, [1,1], 2>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
 
 defm : X86WriteRes<WriteCvtSS2SD,     [SKLPort5,SKLPort01],  5, [1,1], 2>;
@@ -925,7 +927,7 @@ def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> {
 }
 def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
 
-def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort01]> {
+def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort0]> {
   let Latency = 5;
   let NumMicroOps = 2;
   let ReleaseAtCycles = [1,1];
@@ -965,7 +967,7 @@ def: InstRW<[SKLWriteResGroup67], (instregex "(V?)MOVSHDUPrm",
                                              "(V?)MOVSLDUPrm",
                                              "(V?)MOVDDUPrm")>;
 
-def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> {
+def SKLWriteResGroup68 : SchedWriteRes<[SKLPort01]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ReleaseAtCycles = [2];
@@ -1239,13 +1241,6 @@ def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
 }
 def: SchedAlias<WriteADCRMW, SKLWriteResGroup119>;
 
-def SKLWriteResGroup120 : SchedWriteRes<[SKLPort0,SKLPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 2;
-  let ReleaseAtCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup120], (instrs MMX_CVTPI2PSrm)>;
-
 def SKLWriteResGroup121 : SchedWriteRes<[SKLPort5,SKLPort23]> {
   let Latency = 9;
   let NumMicroOps = 2;
@@ -1258,7 +1253,7 @@ def: InstRW<[SKLWriteResGroup121], (instrs PCMPGTQrm,
                                            VPMOVSXWDYrm,
                                            VPMOVZXWDYrm)>;
 
-def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
+def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort0]> {
   let Latency = 9;
   let NumMicroOps = 2;
   let ReleaseAtCycles = [1,1];
@@ -1290,13 +1285,6 @@ def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
                                               "ILD_F(16|32|64)m")>;
 def: InstRW<[SKLWriteResGroup133], (instrs VPCMPGTQYrm)>;
 
-def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
-  let Latency = 10;
-  let NumMicroOps = 3;
-  let ReleaseAtCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup138], (instrs MMX_CVTPI2PDrm)>;
-
 def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
   let Latency = 10;
   let NumMicroOps = 4;

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
index ae3144227e98df..e05911e4709dc1 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
@@ -1115,9 +1115,9 @@ vzeroupper
 # CHECK-NEXT:  1      2     1.00                        vcomiss	%xmm0, %xmm1
 # CHECK-NEXT:  2      7     1.00    *                   vcomiss	(%rax), %xmm1
 # CHECK-NEXT:  2      5     1.00                        vcvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  3      11    1.00    *                   vcvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   vcvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  2      7     1.00                        vcvtdq2pd	%xmm0, %ymm2
-# CHECK-NEXT:  3      13    1.00    *                   vcvtdq2pd	(%rax), %ymm2
+# CHECK-NEXT:  2      11    0.50    *                   vcvtdq2pd	(%rax), %ymm2
 # CHECK-NEXT:  1      4     0.50                        vcvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  2      10    0.50    *                   vcvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  1      4     0.50                        vcvtdq2ps	%ymm0, %ymm2
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     126.00 340.58 198.58 173.83 173.83 38.00  326.58 6.25   11.33
+# CHECK-NEXT:  -     126.00 338.58 200.58 173.83 173.83 38.00  324.58 6.25   11.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1824,10 +1824,10 @@ vzeroupper
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     vcomisd	(%rax), %xmm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vcomiss	%xmm0, %xmm1
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     vcomiss	(%rax), %xmm1
-# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vcvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     vcvtdq2pd	(%rax), %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vcvtdq2pd	%xmm0, %ymm2
-# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     vcvtdq2pd	(%rax), %ymm2
+# CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     vcvtdq2pd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vcvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     vcvtdq2pd	%xmm0, %ymm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vcvtdq2pd	(%rax), %ymm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     vcvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vcvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     vcvtdq2ps	%ymm0, %ymm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
index b066ce3ca1926d..142006c3c3d384 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
@@ -208,10 +208,10 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  2      9     0.50    *                   cmpeqss	(%rax), %xmm2
 # CHECK-NEXT:  1      2     1.00                        comiss	%xmm0, %xmm1
 # CHECK-NEXT:  2      7     1.00    *                   comiss	(%rax), %xmm1
-# CHECK-NEXT:  2      6     2.00                        cvtpi2ps	%mm0, %xmm2
-# CHECK-NEXT:  2      9     1.00    *                   cvtpi2ps	(%rax), %xmm2
+# CHECK-NEXT:  2      6     1.00                        cvtpi2ps	%mm0, %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   cvtpi2ps	(%rax), %xmm2
 # CHECK-NEXT:  2      5     1.00                        cvtps2pi	%xmm0, %mm2
-# CHECK-NEXT:  2      9     0.50    *                   cvtps2pi	(%rax), %mm2
+# CHECK-NEXT:  2      9     1.00    *                   cvtps2pi	(%rax), %mm2
 # CHECK-NEXT:  2      5     1.00                        cvtsi2ss	%ecx, %xmm2
 # CHECK-NEXT:  3      6     2.00                        cvtsi2ss	%rcx, %xmm2
 # CHECK-NEXT:  2      10    0.50    *                   cvtsi2ssl	(%rax), %xmm2
@@ -221,7 +221,7 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  3      11    1.00    *                   cvtss2si	(%rax), %ecx
 # CHECK-NEXT:  3      11    1.00    *                   cvtss2si	(%rax), %rcx
 # CHECK-NEXT:  2      5     1.00                        cvttps2pi	%xmm0, %mm2
-# CHECK-NEXT:  2      9     0.50    *                   cvttps2pi	(%rax), %mm2
+# CHECK-NEXT:  2      9     1.00    *                   cvttps2pi	(%rax), %mm2
 # CHECK-NEXT:  2      6     1.00                        cvttss2si	%xmm0, %ecx
 # CHECK-NEXT:  3      7     1.00                        cvttss2si	%xmm0, %rcx
 # CHECK-NEXT:  3      11    1.00    *                   cvttss2si	(%rax), %ecx
@@ -333,7 +333,7 @@ xorps       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     24.00  72.83  23.83  32.00  32.00  8.00   31.83  0.50   3.00
+# CHECK-NEXT:  -     24.00  73.33  23.33  32.00  32.00  8.00   31.83  0.50   3.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -351,10 +351,10 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cmpeqss	(%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     comiss	%xmm0, %xmm1
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     comiss	(%rax), %xmm1
-# CHECK-NEXT:  -      -     2.00    -      -      -      -      -      -      -     cvtpi2ps	%mm0, %xmm2
-# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     cvtpi2ps	(%rax), %xmm2
-# CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvtps2pi	%xmm0, %mm2
-# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtps2pi	(%rax), %mm2
+# CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     cvtpi2ps	%mm0, %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtpi2ps	(%rax), %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     cvtps2pi	%xmm0, %mm2
+# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     cvtps2pi	(%rax), %mm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvtsi2ss	%ecx, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     2.00    -      -     cvtsi2ss	%rcx, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtsi2ssl	(%rax), %xmm2
@@ -363,8 +363,8 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  -      -     1.50   0.50    -      -      -     1.00    -      -     cvtss2si	%xmm0, %rcx
 # CHECK-NEXT:  -      -     1.50   0.50   0.50   0.50    -      -      -      -     cvtss2si	(%rax), %ecx
 # CHECK-NEXT:  -      -     1.50   0.50   0.50   0.50    -      -      -      -     cvtss2si	(%rax), %rcx
-# CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvttps2pi	%xmm0, %mm2
-# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvttps2pi	(%rax), %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     cvttps2pi	%xmm0, %mm2
+# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     cvttps2pi	(%rax), %mm2
 # CHECK-NEXT:  -      -     1.50   0.50    -      -      -      -      -      -     cvttss2si	%xmm0, %ecx
 # CHECK-NEXT:  -      -     1.50   0.50    -      -      -     1.00    -      -     cvttss2si	%xmm0, %rcx
 # CHECK-NEXT:  -      -     1.50   0.50   0.50   0.50    -      -      -      -     cvttss2si	(%rax), %ecx

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
index 188a07ea71de17..b527c1cda3a8fc 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
@@ -423,7 +423,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      2     1.00                        comisd	%xmm0, %xmm1
 # CHECK-NEXT:  2      7     1.00    *                   comisd	(%rax), %xmm1
 # CHECK-NEXT:  2      5     1.00                        cvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  3      11    1.00    *                   cvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   cvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  1      4     0.50                        cvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  2      10    0.50    *                   cvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  2      5     1.00                        cvtpd2dq	%xmm0, %xmm2
@@ -433,7 +433,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  2      5     1.00                        cvtpd2ps	%xmm0, %xmm2
 # CHECK-NEXT:  3      11    1.00    *                   cvtpd2ps	(%rax), %xmm2
 # CHECK-NEXT:  2      5     1.00                        cvtpi2pd	%mm0, %xmm2
-# CHECK-NEXT:  3      10    1.00    *                   cvtpi2pd	(%rax), %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   cvtpi2pd	(%rax), %xmm2
 # CHECK-NEXT:  1      4     0.50                        cvtps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  2      10    0.50    *                   cvtps2dq	(%rax), %xmm2
 # CHECK-NEXT:  2      5     1.00                        cvtps2pd	%xmm0, %xmm2
@@ -689,7 +689,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     40.00  113.58 79.58  63.50  63.50  14.00  93.58  2.25   5.00
+# CHECK-NEXT:  -     40.00  111.58 81.58  63.50  63.50  14.00  91.58  2.25   5.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -708,8 +708,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cmpeqsd	(%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     comisd	%xmm0, %xmm1
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     comisd	(%rax), %xmm1
-# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     cvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     cvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvtdq2pd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     cvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvtpd2dq	%xmm0, %xmm2
@@ -718,8 +718,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -     1.00    -      -     cvtpd2pi	(%rax), %mm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvtpd2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -     1.00    -      -     cvtpd2ps	(%rax), %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     cvtpi2pd	%mm0, %xmm2
-# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     cvtpi2pd	(%rax), %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvtpi2pd	%mm0, %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtpi2pd	(%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     cvtps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtps2dq	(%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvtps2pd	%xmm0, %xmm2


        


More information about the llvm-commits mailing list