[llvm] 810b8fd - [X86] Replace unnecessary CVTPS2PI/CVTPS2DQ overrides with better base class defs

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 9 09:11:57 PST 2022


Author: Simon Pilgrim
Date: 2022-11-09T17:08:45Z
New Revision: 810b8fdff92ae8c234041234fdc1a175c3eb1ff9

URL: https://github.com/llvm/llvm-project/commit/810b8fdff92ae8c234041234fdc1a175c3eb1ff9
DIFF: https://github.com/llvm/llvm-project/commit/810b8fdff92ae8c234041234fdc1a175c3eb1ff9.diff

LOG: [X86] Replace unnecessary CVTPS2PI/CVTPS2DQ overrides with better base class defs

Broadwell/Haswell were completely overriding the WriteCvtPD2I class defs - we can remove those overrides entirely by just choosing better class defs.

Also fixes the scheduler for a missing YMM folded case - confirmed with Agner + uops.info that the port usage is correct

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86SchedBroadwell.td
    llvm/lib/Target/X86/X86SchedHaswell.td
    llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index c6bc7751cb1fc..7156c2ea9d592 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -362,8 +362,8 @@ defm : BWWriteResPair<WriteCvtPS2I,   [BWPort1], 3>;
 defm : BWWriteResPair<WriteCvtPS2IY,  [BWPort1], 3>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
 defm : BWWriteResPair<WriteCvtSD2I,   [BWPort1,BWPort0], 4, [1,1], 2, 5>;
-defm : BWWriteResPair<WriteCvtPD2I,   [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2IY,  [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2I,   [BWPort1,BWPort5], 4, [1,1], 2, 5>;
+defm : BWWriteResPair<WriteCvtPD2IY,  [BWPort1,BWPort5], 6, [1,1], 2, 6>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
 
 defm : BWWriteResPair<WriteCvtI2SS,   [BWPort1], 4>;
@@ -851,12 +851,10 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIrr",
-                                            "MMX_CVT(T?)PS2PIrr",
+def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PS2PIrr",
                                             "(V?)CVTSI642SDrr",
                                             "(V?)CVTSI2SDrr",
-                                            "(V?)CVTSI2SSrr",
-                                            "(V?)CVT(T?)PD2DQrr")>;
+                                            "(V?)CVTSI2SSrr")>;
 
 def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> {
   let Latency = 4;
@@ -968,14 +966,6 @@ def: InstRW<[BWWriteResGroup59], (instrs CVTPS2PDrm, VCVTPS2PDrm,
                                          VPSLLVQrm,
                                          VPSRLVQrm)>;
 
-def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup60], (instrs VCVTPD2DQYrr,
-                                         VCVTTPD2DQYrr)>;
-
 def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
   let Latency = 6;
   let NumMicroOps = 2;
@@ -1188,15 +1178,6 @@ def: InstRW<[BWWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
 def: InstRW<[BWWriteResGroup101], (instrs VCVTPS2DQYrm,
                                           VCVTTPS2DQYrm)>;
 
-def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup107], (instrs CVTPD2DQrm, VCVTPD2DQrm,
-                                          CVTTPD2DQrm, VCVTTPD2DQrm)>;
-def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIrm")>;
-
 def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> {
   let Latency = 9;
   let NumMicroOps = 3;

diff  --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 0cd007cdce907..7c5804fc198f2 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -356,9 +356,9 @@ defm : HWWriteResPair<WriteFVarBlendZ, [HWPort5], 2, [2], 2, 7>; // Unsupported
 
 // Conversion between integer and float.
 defm : HWWriteResPair<WriteCvtSD2I,   [HWPort1,HWPort0], 4, [1,1], 2, 5>;
-defm : HWWriteResPair<WriteCvtPD2I,   [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPD2IY,  [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPD2IZ,  [HWPort1], 3>; // Unsupported = 1
+defm : HWWriteResPair<WriteCvtPD2I,   [HWPort1,HWPort5], 4, [1,1], 2, 6>;
+defm : HWWriteResPair<WriteCvtPD2IY,  [HWPort1,HWPort5], 6, [1,1], 2, 6>;
+defm : HWWriteResPair<WriteCvtPD2IZ,  [HWPort1,HWPort5], 6, [1,1], 2, 6>; // Unsupported = 1
 defm : HWWriteResPair<WriteCvtSS2I,   [HWPort1,HWPort0], 4, [1,1], 2, 5>;
 defm : HWWriteResPair<WriteCvtPS2I,   [HWPort1], 3, [1], 1, 6>;
 defm : HWWriteResPair<WriteCvtPS2IY,  [HWPort1], 3, [1], 1, 7>;
@@ -1354,13 +1354,10 @@ def HWWriteResGroup73 : SchedWriteRes<[HWPort1,HWPort5]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPD2PIrr,
-                                         MMX_CVTPS2PIrr,
-                                         MMX_CVTTPD2PIrr,
+def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPS2PIrr,
                                          MMX_CVTTPS2PIrr)>;
 def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTSI(64)?2SDrr",
-                                            "(V?)CVTSI2SSrr",
-                                            "(V?)CVT(T?)PD2DQrr")>;
+                                            "(V?)CVTSI2SSrr")>;
 
 def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
   let Latency = 11;
@@ -1369,16 +1366,6 @@ def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
 }
 def: InstRW<[HWWriteResGroup75], (instregex "FICOM(P?)(16|32)m")>;
 
-def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
-  let Latency = 10;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup78], (instrs CVTPD2DQrm, VCVTPD2DQrm,
-                                         CVTTPD2DQrm, VCVTTPD2DQrm,
-                                         MMX_CVTPD2PIrm,
-                                         MMX_CVTTPD2PIrm)>;
-
 def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
   let Latency = 9;
   let NumMicroOps = 3;
@@ -1479,14 +1466,6 @@ def HWWriteResGroup100 : SchedWriteRes<[HWPort06,HWPort0156]> {
 }
 def: InstRW<[HWWriteResGroup100], (instrs XSETBV)>;
 
-def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup102], (instrs VCVTPD2DQYrr,
-                                          VCVTTPD2DQYrr)>;
-
 def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
   let Latency = 13;
   let NumMicroOps = 3;

diff  --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
index 27c6120d84987..c33cc79bd6c18 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
@@ -1125,7 +1125,7 @@ vzeroupper
 # CHECK-NEXT:  2      4     1.00                        vcvtpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vcvtpd2dqx	(%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvtpd2dq	%ymm0, %xmm2
-# CHECK-NEXT:  2      8     1.00    *                   vcvtpd2dqy	(%rax), %xmm2
+# CHECK-NEXT:  3      12    1.00    *                   vcvtpd2dqy	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        vcvtpd2ps	%xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vcvtpd2psx	(%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvtpd2ps	%ymm0, %xmm2
@@ -1161,7 +1161,7 @@ vzeroupper
 # CHECK-NEXT:  2      4     1.00                        vcvttpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vcvttpd2dqx	(%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvttpd2dq	%ymm0, %xmm2
-# CHECK-NEXT:  2      8     1.00    *                   vcvttpd2dqy	(%rax), %xmm2
+# CHECK-NEXT:  3      12    1.00    *                   vcvttpd2dqy	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcvttps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *                   vcvttps2dq	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcvttps2dq	%ymm0, %ymm2
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     257.00 215.25 235.25 176.17 176.17 38.00  430.25 2.25   12.67
+# CHECK-NEXT:  -     257.00 215.25 235.25 176.17 176.17 38.00  432.25 2.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1835,7 +1835,7 @@ vzeroupper
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2dqx	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2dq	%ymm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtpd2dqy	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2dqy	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2psx	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2ps	%ymm0, %xmm2
@@ -1871,7 +1871,7 @@ vzeroupper
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvttpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvttpd2dqx	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvttpd2dq	%ymm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvttpd2dqy	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvttpd2dqy	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvttps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvttps2dq	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvttps2dq	%ymm0, %ymm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
index ea7d251ffccef..3da547de54e36 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
@@ -1125,7 +1125,7 @@ vzeroupper
 # CHECK-NEXT:  2      4     1.00                        vcvtpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   vcvtpd2dqx	(%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvtpd2dq	%ymm0, %xmm2
-# CHECK-NEXT:  2      8     1.00    *                   vcvtpd2dqy	(%rax), %xmm2
+# CHECK-NEXT:  3      12    1.00    *                   vcvtpd2dqy	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        vcvtpd2ps	%xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   vcvtpd2psx	(%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvtpd2ps	%ymm0, %xmm2
@@ -1161,7 +1161,7 @@ vzeroupper
 # CHECK-NEXT:  2      4     1.00                        vcvttpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   vcvttpd2dqx	(%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvttpd2dq	%ymm0, %xmm2
-# CHECK-NEXT:  2      8     1.00    *                   vcvttpd2dqy	(%rax), %xmm2
+# CHECK-NEXT:  3      12    1.00    *                   vcvttpd2dqy	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcvttps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  2      9     1.00    *                   vcvttps2dq	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcvttps2dq	%ymm0, %ymm2
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     336.00 214.58 236.58 176.17 176.17 38.00  433.58 2.25   12.67
+# CHECK-NEXT:  -     336.00 214.58 236.58 176.17 176.17 38.00  435.58 2.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1835,7 +1835,7 @@ vzeroupper
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2dqx	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2dq	%ymm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtpd2dqy	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2dqy	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2psx	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2ps	%ymm0, %xmm2
@@ -1871,7 +1871,7 @@ vzeroupper
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvttpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvttpd2dqx	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvttpd2dq	%ymm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvttpd2dqy	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvttpd2dqy	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvttps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvttps2dq	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvttps2dq	%ymm0, %ymm2


        


More information about the llvm-commits mailing list