[llvm] e381d8b - [X86][Atom] Fix (U)COMISS/SD uops, latency and throughput

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 19 05:02:42 PDT 2021


Author: Simon Pilgrim
Date: 2021-09-19T12:44:44+01:00
New Revision: e381d8b24329cae6408205f74d0d6d9eaa6b29cf

URL: https://github.com/llvm/llvm-project/commit/e381d8b24329cae6408205f74d0d6d9eaa6b29cf
DIFF: https://github.com/llvm/llvm-project/commit/e381d8b24329cae6408205f74d0d6d9eaa6b29cf.diff

LOG: [X86][Atom] Fix (U)COMISS/SD uops, latency and throughput

Both ports are required, for reg and mem variants - we can also use the WriteFComX class directly and remove the unnecessary InstRW overrides. Matches what Intel AoM / Agner / InstLatX64 report as well.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleAtom.td
    llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
    llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index e9020f54f2227..4c6a9397ea1fa 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -249,13 +249,13 @@ defm : AtomWriteResPair<WriteFCmp64X,        [AtomPort0,AtomPort1], [AtomPort0,A
 defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
 defm : AtomWriteResPair<WriteFCom,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
-defm : AtomWriteResPair<WriteFComX,          [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFComX,          [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  9, 10,  [9,9],[10,10], 4, 5>;
 defm : AtomWriteResPair<WriteFMul,           [AtomPort0],  [AtomPort0],  4,  4,  [2],  [2]>;
 defm : AtomWriteResPair<WriteFMulX,          [AtomPort0],  [AtomPort0],  5,  5,  [2],  [2]>;
 defm : X86WriteResPairUnsupported<WriteFMulY>;
 defm : X86WriteResPairUnsupported<WriteFMulZ>;
 defm : AtomWriteResPair<WriteFMul64,         [AtomPort0],  [AtomPort0],  5,  5,  [2],  [2]>;
-defm : AtomWriteResPair<WriteFMul64X,        [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  9, 10,  [9,9],  [10,10]>;
+defm : AtomWriteResPair<WriteFMul64X,        [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  9, 10,  [9,9],[10,10]>;
 defm : X86WriteResPairUnsupported<WriteFMul64Y>;
 defm : X86WriteResPairUnsupported<WriteFMul64Z>;
 defm : AtomWriteResPair<WriteFRcp,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
@@ -648,7 +648,6 @@ def : InstRW<[AtomWrite01_9], (instrs POPA16, POPA32,
                                       SHLD64rri8, SHRD64rri8,
                                       CMPXCHG8rr)>;
 def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
-                                         "(U)?COMIS(D|S)rr",
                                          "CVT(T)?SS2SI64rr(_Int)?")>;
 
 def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
@@ -656,8 +655,7 @@ def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
   let ResourceCycles = [10];
 }
 def : SchedAlias<WriteFLDC, AtomWrite01_10>;
-def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
-                                          "CVT(T)?SS2SI64rm(_Int)?")>;
+def : InstRW<[AtomWrite01_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
 
 def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
   let Latency = 11;

diff  --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
index fb6aba02155f1..a75c2bb5a86d2 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
@@ -206,8 +206,8 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  1      7     6.00    *                   cmpeqps	(%rax), %xmm2
 # CHECK-NEXT:  1      5     1.00                        cmpeqss	%xmm0, %xmm2
 # CHECK-NEXT:  1      5     1.00    *                   cmpeqss	(%rax), %xmm2
-# CHECK-NEXT:  1      9     4.50                        comiss	%xmm0, %xmm1
-# CHECK-NEXT:  1      10    5.00    *                   comiss	(%rax), %xmm1
+# CHECK-NEXT:  4      9     9.00                        comiss	%xmm0, %xmm1
+# CHECK-NEXT:  5      10    10.00   *                   comiss	(%rax), %xmm1
 # CHECK-NEXT:  1      5     5.00                        cvtpi2ps	%mm0, %xmm2
 # CHECK-NEXT:  1      5     5.00    *                   cvtpi2ps	(%rax), %xmm2
 # CHECK-NEXT:  1      5     5.00                        cvtps2pi	%xmm0, %mm2
@@ -310,8 +310,8 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  1      5     1.00    *                   subps	(%rax), %xmm2
 # CHECK-NEXT:  1      5     1.00                        subss	%xmm0, %xmm2
 # CHECK-NEXT:  1      5     1.00    *                   subss	(%rax), %xmm2
-# CHECK-NEXT:  1      9     4.50                        ucomiss	%xmm0, %xmm1
-# CHECK-NEXT:  1      10    5.00    *                   ucomiss	(%rax), %xmm1
+# CHECK-NEXT:  4      9     9.00                        ucomiss	%xmm0, %xmm1
+# CHECK-NEXT:  5      10    10.00   *                   ucomiss	(%rax), %xmm1
 # CHECK-NEXT:  1      1     1.00                        unpckhps	%xmm0, %xmm2
 # CHECK-NEXT:  1      1     1.00    *                   unpckhps	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        unpcklps	%xmm0, %xmm2
@@ -325,7 +325,7 @@ xorps       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]
-# CHECK-NEXT: 456.00 411.00
+# CHECK-NEXT: 475.00 430.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    Instructions:
@@ -341,8 +341,8 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT: 6.00   6.00   cmpeqps	(%rax), %xmm2
 # CHECK-NEXT:  -     1.00   cmpeqss	%xmm0, %xmm2
 # CHECK-NEXT: 1.00   1.00   cmpeqss	(%rax), %xmm2
-# CHECK-NEXT: 4.50   4.50   comiss	%xmm0, %xmm1
-# CHECK-NEXT: 5.00   5.00   comiss	(%rax), %xmm1
+# CHECK-NEXT: 9.00   9.00   comiss	%xmm0, %xmm1
+# CHECK-NEXT: 10.00  10.00  comiss	(%rax), %xmm1
 # CHECK-NEXT:  -     5.00   cvtpi2ps	%mm0, %xmm2
 # CHECK-NEXT:  -     5.00   cvtpi2ps	(%rax), %xmm2
 # CHECK-NEXT:  -     5.00   cvtps2pi	%xmm0, %mm2
@@ -445,8 +445,8 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT: 1.00   1.00   subps	(%rax), %xmm2
 # CHECK-NEXT:  -     1.00   subss	%xmm0, %xmm2
 # CHECK-NEXT: 1.00   1.00   subss	(%rax), %xmm2
-# CHECK-NEXT: 4.50   4.50   ucomiss	%xmm0, %xmm1
-# CHECK-NEXT: 5.00   5.00   ucomiss	(%rax), %xmm1
+# CHECK-NEXT: 9.00   9.00   ucomiss	%xmm0, %xmm1
+# CHECK-NEXT: 10.00  10.00  ucomiss	(%rax), %xmm1
 # CHECK-NEXT: 1.00    -     unpckhps	%xmm0, %xmm2
 # CHECK-NEXT: 1.00    -     unpckhps	(%rax), %xmm2
 # CHECK-NEXT: 1.00    -     unpcklps	%xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
index 2cd1d8d93c0c7..525ba77cdaab0 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
@@ -420,8 +420,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      7     6.00    *                   cmpeqpd	(%rax), %xmm2
 # CHECK-NEXT:  1      5     1.00                        cmpeqsd	%xmm0, %xmm2
 # CHECK-NEXT:  1      5     1.00    *                   cmpeqsd	(%rax), %xmm2
-# CHECK-NEXT:  1      9     4.50                        comisd	%xmm0, %xmm1
-# CHECK-NEXT:  1      10    5.00    *                   comisd	(%rax), %xmm1
+# CHECK-NEXT:  4      9     9.00                        comisd	%xmm0, %xmm1
+# CHECK-NEXT:  5      10    10.00   *                   comisd	(%rax), %xmm1
 # CHECK-NEXT:  1      7     6.00                        cvtdq2pd	%xmm0, %xmm2
 # CHECK-NEXT:  1      8     7.00    *                   cvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  1      6     5.00                        cvtdq2ps	%xmm0, %xmm2
@@ -666,8 +666,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      7     6.00    *                   subpd	(%rax), %xmm2
 # CHECK-NEXT:  1      5     1.00                        subsd	%xmm0, %xmm2
 # CHECK-NEXT:  1      5     1.00    *                   subsd	(%rax), %xmm2
-# CHECK-NEXT:  1      9     4.50                        ucomisd	%xmm0, %xmm1
-# CHECK-NEXT:  1      10    5.00    *                   ucomisd	(%rax), %xmm1
+# CHECK-NEXT:  4      9     9.00                        ucomisd	%xmm0, %xmm1
+# CHECK-NEXT:  5      10    10.00   *                   ucomisd	(%rax), %xmm1
 # CHECK-NEXT:  1      1     1.00                        unpckhpd	%xmm0, %xmm2
 # CHECK-NEXT:  1      1     1.00    *                   unpckhpd	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        unpcklpd	%xmm0, %xmm2
@@ -681,7 +681,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]
-# CHECK-NEXT: 913.00 773.00
+# CHECK-NEXT: 932.00 792.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    Instructions:
@@ -698,8 +698,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT: 6.00   6.00   cmpeqpd	(%rax), %xmm2
 # CHECK-NEXT:  -     1.00   cmpeqsd	%xmm0, %xmm2
 # CHECK-NEXT: 1.00   1.00   cmpeqsd	(%rax), %xmm2
-# CHECK-NEXT: 4.50   4.50   comisd	%xmm0, %xmm1
-# CHECK-NEXT: 5.00   5.00   comisd	(%rax), %xmm1
+# CHECK-NEXT: 9.00   9.00   comisd	%xmm0, %xmm1
+# CHECK-NEXT: 10.00  10.00  comisd	(%rax), %xmm1
 # CHECK-NEXT: 6.00   6.00   cvtdq2pd	%xmm0, %xmm2
 # CHECK-NEXT: 7.00   7.00   cvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT: 5.00   5.00   cvtdq2ps	%xmm0, %xmm2
@@ -944,8 +944,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT: 6.00   6.00   subpd	(%rax), %xmm2
 # CHECK-NEXT:  -     1.00   subsd	%xmm0, %xmm2
 # CHECK-NEXT: 1.00   1.00   subsd	(%rax), %xmm2
-# CHECK-NEXT: 4.50   4.50   ucomisd	%xmm0, %xmm1
-# CHECK-NEXT: 5.00   5.00   ucomisd	(%rax), %xmm1
+# CHECK-NEXT: 9.00   9.00   ucomisd	%xmm0, %xmm1
+# CHECK-NEXT: 10.00  10.00  ucomisd	(%rax), %xmm1
 # CHECK-NEXT: 1.00    -     unpckhpd	%xmm0, %xmm2
 # CHECK-NEXT: 1.00    -     unpckhpd	(%rax), %xmm2
 # CHECK-NEXT: 1.00    -     unpcklpd	%xmm0, %xmm2


        


More information about the llvm-commits mailing list