[llvm] edf8855 - [X86] Replace unnecessary int2float and float2double overrides with better base class defs

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 5 12:07:09 PDT 2022


Author: Simon Pilgrim
Date: 2022-11-05T19:07:01Z
New Revision: edf885531e9e38fb127f6075373b706acef7b59c

URL: https://github.com/llvm/llvm-project/commit/edf885531e9e38fb127f6075373b706acef7b59c
DIFF: https://github.com/llvm/llvm-project/commit/edf885531e9e38fb127f6075373b706acef7b59c.diff

LOG: [X86] Replace unnecessary int2float and float2double overrides with better base class defs

Broadwell/Haswell were completely overriding the class defs - we can remove those overrides entirely by just choosing better class defs (plus a fix for missing mmx folded load).

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86SchedBroadwell.td
    llvm/lib/Target/X86/X86SchedHaswell.td
    llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 003a4e376adb..d4ffdea79c5c 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -367,17 +367,17 @@ defm : BWWriteResPair<WriteCvtPD2IY,  [BWPort1], 3>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
 
 defm : BWWriteResPair<WriteCvtI2SS,   [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PS,   [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PSY,  [BWPort1], 4>;
+defm : BWWriteResPair<WriteCvtI2PS,   [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtI2PSY,  [BWPort1], 3, [1], 1, 6>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
 defm : BWWriteResPair<WriteCvtI2SD,   [BWPort1], 4>;
 defm : BWWriteResPair<WriteCvtI2PD,   [BWPort1,BWPort5], 4, [1,1], 2, 5>;
 defm : BWWriteResPair<WriteCvtI2PDY,  [BWPort1,BWPort5], 6, [1,1], 2, 5>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
 
-defm : BWWriteResPair<WriteCvtSS2SD,  [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2PD,  [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtSS2SD,  [BWPort0,BWPort5], 2, [1,1], 2, 5>;
+defm : BWWriteResPair<WriteCvtPS2PD,  [BWPort0,BWPort5], 2, [1,1], 2, 5>;
+defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort0,BWPort5], 4, [1,1], 2, 5>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
 defm : BWWriteResPair<WriteCvtSD2SS,  [BWPort1,BWPort5], 4, [1,1], 2, 5>;
 defm : BWWriteResPair<WriteCvtPD2PS,  [BWPort1,BWPort5], 4, [1,1], 2, 5>;
@@ -715,14 +715,6 @@ def: InstRW<[BWWriteResGroup14], (instrs LFENCE,
                                          WAIT,
                                          XGETBV)>;
 
-def BWWriteResGroup15 : SchedWriteRes<[BWPort0,BWPort5]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup15], (instregex "(V?)CVTPS2PDrr",
-                                            "(V?)CVTSS2SDrr")>;
-
 def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> {
   let Latency = 2;
   let NumMicroOps = 2;
@@ -784,9 +776,7 @@ def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup27], (instrs MMX_CVTPI2PSrr)>;
-def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr",
-                                            "(V?)CVTDQ2PS(Y?)rr")>;
+def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr")>;
 
 def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> {
   let Latency = 3;
@@ -858,13 +848,6 @@ def: InstRW<[BWWriteResGroup39], (instregex "(V?)CVT(T?)SD2SI64rr",
                                             "(V?)CVT(T?)SS2SI64rr",
                                             "(V?)CVT(T?)SS2SIrr")>;
 
-def BWWriteResGroup40 : SchedWriteRes<[BWPort0,BWPort5]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup40], (instrs VCVTPS2PDYrr)>;
-
 def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> {
   let Latency = 4;
   let NumMicroOps = 2;
@@ -1164,9 +1147,6 @@ def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup91], (instrs MMX_CVTPI2PSrm,
-                                         CVTDQ2PSrm,
-                                         VCVTDQ2PSrm)>;
 def: InstRW<[BWWriteResGroup91], (instregex "P(DEP|EXT)(32|64)rm")>;
 
 def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> {
@@ -1230,13 +1210,6 @@ def: InstRW<[BWWriteResGroup105], (instregex "(V?)CVTSS2SI(64)?rm",
                                              "VCVTTSS2SI64rm",
                                              "(V?)CVTTSS2SIrm")>;
 
-def BWWriteResGroup106 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup106], (instrs VCVTPS2PDYrm)>;
-
 def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
   let Latency = 9;
   let NumMicroOps = 3;

diff  --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 6cff9c30ee16..13b0ed25361e 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -360,23 +360,23 @@ defm : HWWriteResPair<WriteCvtPD2I,   [HWPort1], 3>;
 defm : HWWriteResPair<WriteCvtPD2IY,  [HWPort1], 3>;
 defm : HWWriteResPair<WriteCvtPD2IZ,  [HWPort1], 3>; // Unsupported = 1
 defm : HWWriteResPair<WriteCvtSS2I,   [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2I,   [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2IY,  [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2IZ,  [HWPort1], 3>; // Unsupported = 1
+defm : HWWriteResPair<WriteCvtPS2I,   [HWPort1], 3, [1], 1, 6>;
+defm : HWWriteResPair<WriteCvtPS2IY,  [HWPort1], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteCvtPS2IZ,  [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
 
 defm : HWWriteResPair<WriteCvtI2SD,   [HWPort1], 4>;
 defm : HWWriteResPair<WriteCvtI2PD,   [HWPort1,HWPort5], 4, [1,1], 2, 6>;
 defm : HWWriteResPair<WriteCvtI2PDY,  [HWPort1,HWPort5], 6, [1,1], 2, 6>;
 defm : HWWriteResPair<WriteCvtI2PDZ,  [HWPort1,HWPort5], 6, [1,1], 2, 6>; // Unsupported = 1
 defm : HWWriteResPair<WriteCvtI2SS,   [HWPort1], 4>;
-defm : HWWriteResPair<WriteCvtI2PS,   [HWPort1], 4>;
-defm : HWWriteResPair<WriteCvtI2PSY,  [HWPort1], 4>;
-defm : HWWriteResPair<WriteCvtI2PSZ,  [HWPort1], 4>; // Unsupported = 1
-
-defm : HWWriteResPair<WriteCvtSS2SD,  [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2PD,  [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPS2PDZ, [HWPort1], 3>; // Unsupported = 1
+defm : HWWriteResPair<WriteCvtI2PS,   [HWPort1], 3, [1], 1, 6>;
+defm : HWWriteResPair<WriteCvtI2PSY,  [HWPort1], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteCvtI2PSZ,  [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
+
+defm : HWWriteResPair<WriteCvtSS2SD,  [HWPort0,HWPort5], 2, [1,1], 2, 5>;
+defm : HWWriteResPair<WriteCvtPS2PD,  [HWPort0,HWPort5], 2, [1,1], 2, 5>;
+defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort0,HWPort5], 4, [1,1], 2, 6>;
+defm : HWWriteResPair<WriteCvtPS2PDZ, [HWPort0,HWPort5], 4, [1,1], 2, 6>; // Unsupported = 1
 defm : HWWriteResPair<WriteCvtSD2SS,  [HWPort1,HWPort5], 4, [1,1], 2, 5>;
 defm : HWWriteResPair<WriteCvtPD2PS,  [HWPort1,HWPort5], 4, [1,1], 2, 6>;
 defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
@@ -1131,14 +1131,6 @@ def: InstRW<[HWWriteResGroup30], (instrs LFENCE,
                                          WAIT,
                                          XGETBV)>;
 
-def HWWriteResGroup31 : SchedWriteRes<[HWPort0,HWPort5]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup31], (instregex "(V?)CVTPS2PDrr",
-                                            "(V?)CVTSS2SDrr")>;
-
 def HWWriteResGroup32 : SchedWriteRes<[HWPort6,HWPort0156]> {
   let Latency = 2;
   let NumMicroOps = 2;
@@ -1241,9 +1233,7 @@ def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[HWWriteResGroup50], (instrs MMX_CVTPI2PSrr)>;
-def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr",
-                                            "(V?)CVTDQ2PS(Y?)rr")>;
+def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr")>;
 
 def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> {
   let Latency = 3;
@@ -1267,8 +1257,7 @@ def HWWriteResGroup52_1 : SchedWriteRes<[HWPort1,HWPort23]> {
 }
 def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
                                               "ILD_F(16|32|64)m")>;
-def: InstRW<[HWWriteResGroup52_1], (instrs VCVTDQ2PSYrm,
-                                           VCVTPS2DQYrm,
+def: InstRW<[HWWriteResGroup52_1], (instrs VCVTPS2DQYrm,
                                            VCVTTPS2DQYrm)>;
 
 def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> {
@@ -1369,13 +1358,6 @@ def HWWriteResGroup70 : SchedWriteRes<[HWPort0,HWPort1]> {
 def: InstRW<[HWWriteResGroup70], (instregex "(V?)CVT(T?)SD2SI(64)?rr",
                                             "(V?)CVT(T?)SS2SI(64)?rr")>;
 
-def HWWriteResGroup71 : SchedWriteRes<[HWPort0,HWPort5]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup71], (instrs VCVTPS2PDYrr)>;
-
 def HWWriteResGroup72 : SchedWriteRes<[HWPort0,HWPort0156]> {
   let Latency = 4;
   let NumMicroOps = 2;
@@ -1414,13 +1396,6 @@ def: InstRW<[HWWriteResGroup76], (instregex "(V?)CVTSD2SI(64)?rm",
                                             "VCVTTSS2SI64rm",
                                             "(V?)CVTTSS2SIrm")>;
 
-def HWWriteResGroup77 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> {
-  let Latency = 10;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup77], (instrs VCVTPS2PDYrm)>;
-
 def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
   let Latency = 10;
   let NumMicroOps = 3;

diff  --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s
index 17203584ea3f..a79a47724f60 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s
@@ -211,7 +211,7 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        cvtpi2ps	%mm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *                   cvtpi2ps	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        cvtps2pi	%xmm0, %mm2
-# CHECK-NEXT:  2      8     1.00    *                   cvtps2pi	(%rax), %mm2
+# CHECK-NEXT:  2      9     1.00    *                   cvtps2pi	(%rax), %mm2
 # CHECK-NEXT:  2      4     1.00                        cvtsi2ss	%ecx, %xmm2
 # CHECK-NEXT:  3      5     2.00                        cvtsi2ss	%rcx, %xmm2
 # CHECK-NEXT:  2      9     1.00    *                   cvtsi2ssl	(%rax), %xmm2
@@ -221,7 +221,7 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   cvtss2si	(%rax), %ecx
 # CHECK-NEXT:  3      9     1.00    *                   cvtss2si	(%rax), %rcx
 # CHECK-NEXT:  2      4     1.00                        cvttps2pi	%xmm0, %mm2
-# CHECK-NEXT:  2      8     1.00    *                   cvttps2pi	(%rax), %mm2
+# CHECK-NEXT:  2      9     1.00    *                   cvttps2pi	(%rax), %mm2
 # CHECK-NEXT:  2      4     1.00                        cvttss2si	%xmm0, %ecx
 # CHECK-NEXT:  2      4     1.00                        cvttss2si	%xmm0, %rcx
 # CHECK-NEXT:  3      9     1.00    *                   cvttss2si	(%rax), %ecx


        


More information about the llvm-commits mailing list