[llvm] 9ad5969 - [X86][Atom] Fix CVT uops + port usage

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 12 14:59:53 PST 2021


Author: Simon Pilgrim
Date: 2021-12-12T22:57:53Z
New Revision: 9ad5969b5e07e24d14de575533efccfc4162e043

URL: https://github.com/llvm/llvm-project/commit/9ad5969b5e07e24d14de575533efccfc4162e043
DIFF: https://github.com/llvm/llvm-project/commit/9ad5969b5e07e24d14de575533efccfc4162e043.diff

LOG: [X86][Atom] Fix CVT uops + port usage

Fix overrides to use both ports. Update the uops counts + port usage based off the most recent llvm-exegesis captures (PR36895) and what Intel AoM / Agner reports as well.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleAtom.td
    llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
    llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index 292dbbb108e8e..0fedfc01092c8 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -320,30 +320,30 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
 // Conversions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : AtomWriteResPair<WriteCvtSS2I,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  8,  9,  [7,7],  [6,6]>;
-defm : AtomWriteResPair<WriteCvtPS2I,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  6,  7,  [5,5],  [6,6]>;
+defm : AtomWriteResPair<WriteCvtSS2I,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  8,  9,  [8,8],  [9,9], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPS2I,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  6,  7,  [6,6],  [7,7], 3, 4>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2IY>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
-defm : AtomWriteResPair<WriteCvtSD2I,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  8,  9,  [7,7],  [6,6]>;
-defm : AtomWriteResPair<WriteCvtPD2I,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  7,  8,  [6,6],  [7,7]>;
+defm : AtomWriteResPair<WriteCvtSD2I,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  8,  9,  [8,8],[10,10], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPD2I,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  7,  8,  [7,7],  [8,8], 4, 5>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IY>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
 
-defm : AtomWriteResPair<WriteCvtI2SS,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  6,  7,  [5,5],  [6,6]>;
-defm : AtomWriteResPair<WriteCvtI2PS,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  6,  7,  [5,5],  [6,6]>;
+defm : AtomWriteResPair<WriteCvtI2SS,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  6,  7,  [6,6],  [6,6], 3, 1>;
+defm : AtomWriteResPair<WriteCvtI2PS,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  6,  7,  [6,6],  [7,7], 3, 4>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PSY>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
-defm : AtomWriteResPair<WriteCvtI2SD,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  6,  7,  [5,5],  [6,6]>;
-defm : AtomWriteResPair<WriteCvtI2PD,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  7,  8,  [6,6],  [7,7]>;
+defm : AtomWriteResPair<WriteCvtI2SD,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  6,  7,  [6,6],  [7,7], 3, 3>;
+defm : AtomWriteResPair<WriteCvtI2PD,   [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  7,  8,  [6,6],  [7,7], 3, 4>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PDY>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
 
-defm : AtomWriteResPair<WriteCvtSS2SD,  [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  6,  7,  [5,5],  [6,6]>;
-defm : AtomWriteResPair<WriteCvtPS2PD,  [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  7,  8,  [6,6],  [7,7]>;
+defm : AtomWriteResPair<WriteCvtSS2SD,  [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  6,  7,  [6,6],  [7,7], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPS2PD,  [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  7,  8,  [6,6],  [7,7], 4, 5>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2PDY>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
-defm : AtomWriteResPair<WriteCvtSD2SS,  [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  6,  7,  [5,5],  [6,6]>;
-defm : AtomWriteResPair<WriteCvtPD2PS,  [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],  7,  8,  [6,6],  [7,7]>;
+defm : AtomWriteResPair<WriteCvtSD2SS,  [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 10, 11,[10,10],[12,12], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPD2PS,  [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 11, 12,[11,11],[12,12], 4, 5>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2PSY>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
 
@@ -550,6 +550,40 @@ def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
 def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIrm, MMX_CVTTPS2PIrm)>;
 def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
 
+def AtomWrite0_1_7 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+  let Latency = 7;
+  let ResourceCycles = [6,6];
+}
+def : InstRW<[AtomWrite0_1_7], (instregex "CVTSI642SDrm(_Int)?")>;
+
+def AtomWrite0_1_7_4 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+  let Latency = 7;
+  let ResourceCycles = [8,8];
+  let NumMicroOps = 4;
+}
+def : InstRW<[AtomWrite0_1_7_4], (instregex "CVTSI642SSrr(_Int)?")>;
+
+def AtomWrite0_1_8_4 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+  let Latency = 8;
+  let ResourceCycles = [8,8];
+  let NumMicroOps = 4;
+}
+def : InstRW<[AtomWrite0_1_7_4], (instregex "CVTSI642SSrm(_Int)?")>;
+
+def AtomWrite0_1_9 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+  let Latency = 9;
+  let ResourceCycles = [9,9];
+  let NumMicroOps = 4;
+}
+def : InstRW<[AtomWrite0_1_9], (instregex "CVT(T)?SS2SI64rr(_Int)?")>;
+
+def AtomWrite0_1_10 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+  let Latency = 10;
+  let ResourceCycles = [11,11];
+  let NumMicroOps = 5;
+}
+def : InstRW<[AtomWrite0_1_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
+
 // Port0 or Port1
 def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
   let Latency = 1;
@@ -647,15 +681,13 @@ def : InstRW<[AtomWrite01_9], (instrs POPA16, POPA32,
                                       SHLD64mri8, SHRD64mri8,
                                       SHLD64rri8, SHRD64rri8,
                                       CMPXCHG8rr)>;
-def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
-                                         "CVT(T)?SS2SI64rr(_Int)?")>;
+def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F")>;
 
 def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
   let Latency = 10;
   let ResourceCycles = [10];
 }
 def : SchedAlias<WriteFLDC, AtomWrite01_10>;
-def : InstRW<[AtomWrite01_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
 
 def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
   let Latency = 11;

diff  --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
index 97cee7d01bffe..25011cd1aea89 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
@@ -212,20 +212,20 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  1      5     5.00    *                   cvtpi2ps	(%rax), %xmm2
 # CHECK-NEXT:  1      5     5.00                        cvtps2pi	%xmm0, %mm2
 # CHECK-NEXT:  1      5     5.00    *                   cvtps2pi	(%rax), %mm2
-# CHECK-NEXT:  1      6     5.00                        cvtsi2ss	%ecx, %xmm2
-# CHECK-NEXT:  1      6     5.00                        cvtsi2ss	%rcx, %xmm2
+# CHECK-NEXT:  3      6     6.00                        cvtsi2ss	%ecx, %xmm2
+# CHECK-NEXT:  4      7     8.00                        cvtsi2ss	%rcx, %xmm2
 # CHECK-NEXT:  1      7     6.00    *                   cvtsi2ssl	(%rax), %xmm2
-# CHECK-NEXT:  1      7     6.00    *                   cvtsi2ssq	(%rax), %xmm2
-# CHECK-NEXT:  1      8     7.00                        cvtss2si	%xmm0, %ecx
-# CHECK-NEXT:  1      9     4.50                        cvtss2si	%xmm0, %rcx
-# CHECK-NEXT:  1      9     6.00    *                   cvtss2si	(%rax), %ecx
-# CHECK-NEXT:  1      10    5.00    *                   cvtss2si	(%rax), %rcx
+# CHECK-NEXT:  4      7     8.00    *                   cvtsi2ssq	(%rax), %xmm2
+# CHECK-NEXT:  3      8     8.00                        cvtss2si	%xmm0, %ecx
+# CHECK-NEXT:  4      9     9.00                        cvtss2si	%xmm0, %rcx
+# CHECK-NEXT:  4      9     9.00    *                   cvtss2si	(%rax), %ecx
+# CHECK-NEXT:  5      10    11.00   *                   cvtss2si	(%rax), %rcx
 # CHECK-NEXT:  1      5     5.00                        cvttps2pi	%xmm0, %mm2
 # CHECK-NEXT:  1      5     5.00    *                   cvttps2pi	(%rax), %mm2
-# CHECK-NEXT:  1      8     7.00                        cvttss2si	%xmm0, %ecx
-# CHECK-NEXT:  1      9     4.50                        cvttss2si	%xmm0, %rcx
-# CHECK-NEXT:  1      9     6.00    *                   cvttss2si	(%rax), %ecx
-# CHECK-NEXT:  1      10    5.00    *                   cvttss2si	(%rax), %rcx
+# CHECK-NEXT:  3      8     8.00                        cvttss2si	%xmm0, %ecx
+# CHECK-NEXT:  4      9     9.00                        cvttss2si	%xmm0, %rcx
+# CHECK-NEXT:  4      9     9.00    *                   cvttss2si	(%rax), %ecx
+# CHECK-NEXT:  5      10    11.00   *                   cvttss2si	(%rax), %rcx
 # CHECK-NEXT:  6      70    70.00                       divps	%xmm0, %xmm2
 # CHECK-NEXT:  7      70    70.00   *                   divps	(%rax), %xmm2
 # CHECK-NEXT:  3      34    34.00                       divss	%xmm0, %xmm2
@@ -325,7 +325,7 @@ xorps       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]
-# CHECK-NEXT: 712.00 667.00
+# CHECK-NEXT: 747.00 702.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    Instructions:
@@ -347,20 +347,20 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  -     5.00   cvtpi2ps	(%rax), %xmm2
 # CHECK-NEXT:  -     5.00   cvtps2pi	%xmm0, %mm2
 # CHECK-NEXT: 5.00   5.00   cvtps2pi	(%rax), %mm2
-# CHECK-NEXT: 5.00   5.00   cvtsi2ss	%ecx, %xmm2
-# CHECK-NEXT: 5.00   5.00   cvtsi2ss	%rcx, %xmm2
+# CHECK-NEXT: 6.00   6.00   cvtsi2ss	%ecx, %xmm2
+# CHECK-NEXT: 8.00   8.00   cvtsi2ss	%rcx, %xmm2
 # CHECK-NEXT: 6.00   6.00   cvtsi2ssl	(%rax), %xmm2
-# CHECK-NEXT: 6.00   6.00   cvtsi2ssq	(%rax), %xmm2
-# CHECK-NEXT: 7.00   7.00   cvtss2si	%xmm0, %ecx
-# CHECK-NEXT: 4.50   4.50   cvtss2si	%xmm0, %rcx
-# CHECK-NEXT: 6.00   6.00   cvtss2si	(%rax), %ecx
-# CHECK-NEXT: 5.00   5.00   cvtss2si	(%rax), %rcx
+# CHECK-NEXT: 8.00   8.00   cvtsi2ssq	(%rax), %xmm2
+# CHECK-NEXT: 8.00   8.00   cvtss2si	%xmm0, %ecx
+# CHECK-NEXT: 9.00   9.00   cvtss2si	%xmm0, %rcx
+# CHECK-NEXT: 9.00   9.00   cvtss2si	(%rax), %ecx
+# CHECK-NEXT: 11.00  11.00  cvtss2si	(%rax), %rcx
 # CHECK-NEXT:  -     5.00   cvttps2pi	%xmm0, %mm2
 # CHECK-NEXT: 5.00   5.00   cvttps2pi	(%rax), %mm2
-# CHECK-NEXT: 7.00   7.00   cvttss2si	%xmm0, %ecx
-# CHECK-NEXT: 4.50   4.50   cvttss2si	%xmm0, %rcx
-# CHECK-NEXT: 6.00   6.00   cvttss2si	(%rax), %ecx
-# CHECK-NEXT: 5.00   5.00   cvttss2si	(%rax), %rcx
+# CHECK-NEXT: 8.00   8.00   cvttss2si	%xmm0, %ecx
+# CHECK-NEXT: 9.00   9.00   cvttss2si	%xmm0, %rcx
+# CHECK-NEXT: 9.00   9.00   cvttss2si	(%rax), %ecx
+# CHECK-NEXT: 11.00  11.00  cvttss2si	(%rax), %rcx
 # CHECK-NEXT: 70.00  70.00  divps	%xmm0, %xmm2
 # CHECK-NEXT: 70.00  70.00  divps	(%rax), %xmm2
 # CHECK-NEXT: 34.00  34.00  divss	%xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
index a9d0c81c99d6a..82d40dca00f7c 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
@@ -422,44 +422,44 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      5     1.00    *                   cmpeqsd	(%rax), %xmm2
 # CHECK-NEXT:  4      9     9.00                        comisd	%xmm0, %xmm1
 # CHECK-NEXT:  5      10    10.00   *                   comisd	(%rax), %xmm1
-# CHECK-NEXT:  1      7     6.00                        cvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  1      8     7.00    *                   cvtdq2pd	(%rax), %xmm2
-# CHECK-NEXT:  1      6     5.00                        cvtdq2ps	%xmm0, %xmm2
-# CHECK-NEXT:  1      7     6.00    *                   cvtdq2ps	(%rax), %xmm2
-# CHECK-NEXT:  1      7     6.00                        cvtpd2dq	%xmm0, %xmm2
-# CHECK-NEXT:  1      8     7.00    *                   cvtpd2dq	(%rax), %xmm2
-# CHECK-NEXT:  1      7     6.00                        cvtpd2pi	%xmm0, %mm2
-# CHECK-NEXT:  1      8     7.00    *                   cvtpd2pi	(%rax), %mm2
-# CHECK-NEXT:  1      7     6.00                        cvtpd2ps	%xmm0, %xmm2
-# CHECK-NEXT:  1      8     7.00    *                   cvtpd2ps	(%rax), %xmm2
-# CHECK-NEXT:  1      7     6.00                        cvtpi2pd	%mm0, %xmm2
-# CHECK-NEXT:  1      8     7.00    *                   cvtpi2pd	(%rax), %xmm2
-# CHECK-NEXT:  1      6     5.00                        cvtps2dq	%xmm0, %xmm2
-# CHECK-NEXT:  1      7     6.00    *                   cvtps2dq	(%rax), %xmm2
-# CHECK-NEXT:  1      7     6.00                        cvtps2pd	%xmm0, %xmm2
-# CHECK-NEXT:  1      8     7.00    *                   cvtps2pd	(%rax), %xmm2
-# CHECK-NEXT:  1      8     7.00                        cvtsd2si	%xmm0, %ecx
-# CHECK-NEXT:  1      8     7.00                        cvtsd2si	%xmm0, %rcx
-# CHECK-NEXT:  1      9     6.00    *                   cvtsd2si	(%rax), %ecx
-# CHECK-NEXT:  1      9     6.00    *                   cvtsd2si	(%rax), %rcx
-# CHECK-NEXT:  1      6     5.00                        cvtsd2ss	%xmm0, %xmm2
-# CHECK-NEXT:  1      7     6.00    *                   cvtsd2ss	(%rax), %xmm2
-# CHECK-NEXT:  1      6     5.00                        cvtsi2sd	%ecx, %xmm2
-# CHECK-NEXT:  1      6     5.00                        cvtsi2sd	%rcx, %xmm2
-# CHECK-NEXT:  1      7     6.00    *                   cvtsi2sdl	(%rax), %xmm2
+# CHECK-NEXT:  3      7     6.00                        cvtdq2pd	%xmm0, %xmm2
+# CHECK-NEXT:  4      8     7.00    *                   cvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  3      6     6.00                        cvtdq2ps	%xmm0, %xmm2
+# CHECK-NEXT:  4      7     7.00    *                   cvtdq2ps	(%rax), %xmm2
+# CHECK-NEXT:  4      7     7.00                        cvtpd2dq	%xmm0, %xmm2
+# CHECK-NEXT:  5      8     8.00    *                   cvtpd2dq	(%rax), %xmm2
+# CHECK-NEXT:  4      7     7.00                        cvtpd2pi	%xmm0, %mm2
+# CHECK-NEXT:  5      8     8.00    *                   cvtpd2pi	(%rax), %mm2
+# CHECK-NEXT:  4      11    11.00                       cvtpd2ps	%xmm0, %xmm2
+# CHECK-NEXT:  5      12    12.00   *                   cvtpd2ps	(%rax), %xmm2
+# CHECK-NEXT:  3      7     6.00                        cvtpi2pd	%mm0, %xmm2
+# CHECK-NEXT:  4      8     7.00    *                   cvtpi2pd	(%rax), %xmm2
+# CHECK-NEXT:  3      6     6.00                        cvtps2dq	%xmm0, %xmm2
+# CHECK-NEXT:  4      7     7.00    *                   cvtps2dq	(%rax), %xmm2
+# CHECK-NEXT:  4      7     6.00                        cvtps2pd	%xmm0, %xmm2
+# CHECK-NEXT:  5      8     7.00    *                   cvtps2pd	(%rax), %xmm2
+# CHECK-NEXT:  3      8     8.00                        cvtsd2si	%xmm0, %ecx
+# CHECK-NEXT:  3      8     8.00                        cvtsd2si	%xmm0, %rcx
+# CHECK-NEXT:  4      9     10.00   *                   cvtsd2si	(%rax), %ecx
+# CHECK-NEXT:  4      9     10.00   *                   cvtsd2si	(%rax), %rcx
+# CHECK-NEXT:  3      10    10.00                       cvtsd2ss	%xmm0, %xmm2
+# CHECK-NEXT:  4      11    12.00   *                   cvtsd2ss	(%rax), %xmm2
+# CHECK-NEXT:  3      6     6.00                        cvtsi2sd	%ecx, %xmm2
+# CHECK-NEXT:  3      6     6.00                        cvtsi2sd	%rcx, %xmm2
+# CHECK-NEXT:  3      7     7.00    *                   cvtsi2sdl	(%rax), %xmm2
 # CHECK-NEXT:  1      7     6.00    *                   cvtsi2sdq	(%rax), %xmm2
-# CHECK-NEXT:  1      6     5.00                        cvtss2sd	%xmm0, %xmm2
-# CHECK-NEXT:  1      7     6.00    *                   cvtss2sd	(%rax), %xmm2
-# CHECK-NEXT:  1      7     6.00                        cvttpd2dq	%xmm0, %xmm2
-# CHECK-NEXT:  1      8     7.00    *                   cvttpd2dq	(%rax), %xmm2
-# CHECK-NEXT:  1      7     6.00                        cvttpd2pi	%xmm0, %mm2
-# CHECK-NEXT:  1      8     7.00    *                   cvttpd2pi	(%rax), %mm2
-# CHECK-NEXT:  1      6     5.00                        cvttps2dq	%xmm0, %xmm2
-# CHECK-NEXT:  1      7     6.00    *                   cvttps2dq	(%rax), %xmm2
-# CHECK-NEXT:  1      8     7.00                        cvttsd2si	%xmm0, %ecx
-# CHECK-NEXT:  1      8     7.00                        cvttsd2si	%xmm0, %rcx
-# CHECK-NEXT:  1      9     6.00    *                   cvttsd2si	(%rax), %ecx
-# CHECK-NEXT:  1      9     6.00    *                   cvttsd2si	(%rax), %rcx
+# CHECK-NEXT:  3      6     6.00                        cvtss2sd	%xmm0, %xmm2
+# CHECK-NEXT:  4      7     7.00    *                   cvtss2sd	(%rax), %xmm2
+# CHECK-NEXT:  4      7     7.00                        cvttpd2dq	%xmm0, %xmm2
+# CHECK-NEXT:  5      8     8.00    *                   cvttpd2dq	(%rax), %xmm2
+# CHECK-NEXT:  4      7     7.00                        cvttpd2pi	%xmm0, %mm2
+# CHECK-NEXT:  5      8     8.00    *                   cvttpd2pi	(%rax), %mm2
+# CHECK-NEXT:  3      6     6.00                        cvttps2dq	%xmm0, %xmm2
+# CHECK-NEXT:  4      7     7.00    *                   cvttps2dq	(%rax), %xmm2
+# CHECK-NEXT:  3      8     8.00                        cvttsd2si	%xmm0, %ecx
+# CHECK-NEXT:  3      8     8.00                        cvttsd2si	%xmm0, %rcx
+# CHECK-NEXT:  4      9     10.00   *                   cvttsd2si	(%rax), %ecx
+# CHECK-NEXT:  4      9     10.00   *                   cvttsd2si	(%rax), %rcx
 # CHECK-NEXT:  6      125   125.00                      divpd	%xmm0, %xmm2
 # CHECK-NEXT:  7      125   125.00  *                   divpd	(%rax), %xmm2
 # CHECK-NEXT:  3      62    62.00                       divsd	%xmm0, %xmm2
@@ -681,7 +681,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]
-# CHECK-NEXT: 1306.00 1166.00
+# CHECK-NEXT: 1366.00 1226.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    Instructions:
@@ -702,42 +702,42 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT: 10.00  10.00  comisd	(%rax), %xmm1
 # CHECK-NEXT: 6.00   6.00   cvtdq2pd	%xmm0, %xmm2
 # CHECK-NEXT: 7.00   7.00   cvtdq2pd	(%rax), %xmm2
-# CHECK-NEXT: 5.00   5.00   cvtdq2ps	%xmm0, %xmm2
-# CHECK-NEXT: 6.00   6.00   cvtdq2ps	(%rax), %xmm2
-# CHECK-NEXT: 6.00   6.00   cvtpd2dq	%xmm0, %xmm2
-# CHECK-NEXT: 7.00   7.00   cvtpd2dq	(%rax), %xmm2
-# CHECK-NEXT: 6.00   6.00   cvtpd2pi	%xmm0, %mm2
-# CHECK-NEXT: 7.00   7.00   cvtpd2pi	(%rax), %mm2
-# CHECK-NEXT: 6.00   6.00   cvtpd2ps	%xmm0, %xmm2
-# CHECK-NEXT: 7.00   7.00   cvtpd2ps	(%rax), %xmm2
+# CHECK-NEXT: 6.00   6.00   cvtdq2ps	%xmm0, %xmm2
+# CHECK-NEXT: 7.00   7.00   cvtdq2ps	(%rax), %xmm2
+# CHECK-NEXT: 7.00   7.00   cvtpd2dq	%xmm0, %xmm2
+# CHECK-NEXT: 8.00   8.00   cvtpd2dq	(%rax), %xmm2
+# CHECK-NEXT: 7.00   7.00   cvtpd2pi	%xmm0, %mm2
+# CHECK-NEXT: 8.00   8.00   cvtpd2pi	(%rax), %mm2
+# CHECK-NEXT: 11.00  11.00  cvtpd2ps	%xmm0, %xmm2
+# CHECK-NEXT: 12.00  12.00  cvtpd2ps	(%rax), %xmm2
 # CHECK-NEXT: 6.00   6.00   cvtpi2pd	%mm0, %xmm2
 # CHECK-NEXT: 7.00   7.00   cvtpi2pd	(%rax), %xmm2
-# CHECK-NEXT: 5.00   5.00   cvtps2dq	%xmm0, %xmm2
-# CHECK-NEXT: 6.00   6.00   cvtps2dq	(%rax), %xmm2
+# CHECK-NEXT: 6.00   6.00   cvtps2dq	%xmm0, %xmm2
+# CHECK-NEXT: 7.00   7.00   cvtps2dq	(%rax), %xmm2
 # CHECK-NEXT: 6.00   6.00   cvtps2pd	%xmm0, %xmm2
 # CHECK-NEXT: 7.00   7.00   cvtps2pd	(%rax), %xmm2
-# CHECK-NEXT: 7.00   7.00   cvtsd2si	%xmm0, %ecx
-# CHECK-NEXT: 7.00   7.00   cvtsd2si	%xmm0, %rcx
-# CHECK-NEXT: 6.00   6.00   cvtsd2si	(%rax), %ecx
-# CHECK-NEXT: 6.00   6.00   cvtsd2si	(%rax), %rcx
-# CHECK-NEXT: 5.00   5.00   cvtsd2ss	%xmm0, %xmm2
-# CHECK-NEXT: 6.00   6.00   cvtsd2ss	(%rax), %xmm2
-# CHECK-NEXT: 5.00   5.00   cvtsi2sd	%ecx, %xmm2
-# CHECK-NEXT: 5.00   5.00   cvtsi2sd	%rcx, %xmm2
-# CHECK-NEXT: 6.00   6.00   cvtsi2sdl	(%rax), %xmm2
+# CHECK-NEXT: 8.00   8.00   cvtsd2si	%xmm0, %ecx
+# CHECK-NEXT: 8.00   8.00   cvtsd2si	%xmm0, %rcx
+# CHECK-NEXT: 10.00  10.00  cvtsd2si	(%rax), %ecx
+# CHECK-NEXT: 10.00  10.00  cvtsd2si	(%rax), %rcx
+# CHECK-NEXT: 10.00  10.00  cvtsd2ss	%xmm0, %xmm2
+# CHECK-NEXT: 12.00  12.00  cvtsd2ss	(%rax), %xmm2
+# CHECK-NEXT: 6.00   6.00   cvtsi2sd	%ecx, %xmm2
+# CHECK-NEXT: 6.00   6.00   cvtsi2sd	%rcx, %xmm2
+# CHECK-NEXT: 7.00   7.00   cvtsi2sdl	(%rax), %xmm2
 # CHECK-NEXT: 6.00   6.00   cvtsi2sdq	(%rax), %xmm2
-# CHECK-NEXT: 5.00   5.00   cvtss2sd	%xmm0, %xmm2
-# CHECK-NEXT: 6.00   6.00   cvtss2sd	(%rax), %xmm2
-# CHECK-NEXT: 6.00   6.00   cvttpd2dq	%xmm0, %xmm2
-# CHECK-NEXT: 7.00   7.00   cvttpd2dq	(%rax), %xmm2
-# CHECK-NEXT: 6.00   6.00   cvttpd2pi	%xmm0, %mm2
-# CHECK-NEXT: 7.00   7.00   cvttpd2pi	(%rax), %mm2
-# CHECK-NEXT: 5.00   5.00   cvttps2dq	%xmm0, %xmm2
-# CHECK-NEXT: 6.00   6.00   cvttps2dq	(%rax), %xmm2
-# CHECK-NEXT: 7.00   7.00   cvttsd2si	%xmm0, %ecx
-# CHECK-NEXT: 7.00   7.00   cvttsd2si	%xmm0, %rcx
-# CHECK-NEXT: 6.00   6.00   cvttsd2si	(%rax), %ecx
-# CHECK-NEXT: 6.00   6.00   cvttsd2si	(%rax), %rcx
+# CHECK-NEXT: 6.00   6.00   cvtss2sd	%xmm0, %xmm2
+# CHECK-NEXT: 7.00   7.00   cvtss2sd	(%rax), %xmm2
+# CHECK-NEXT: 7.00   7.00   cvttpd2dq	%xmm0, %xmm2
+# CHECK-NEXT: 8.00   8.00   cvttpd2dq	(%rax), %xmm2
+# CHECK-NEXT: 7.00   7.00   cvttpd2pi	%xmm0, %mm2
+# CHECK-NEXT: 8.00   8.00   cvttpd2pi	(%rax), %mm2
+# CHECK-NEXT: 6.00   6.00   cvttps2dq	%xmm0, %xmm2
+# CHECK-NEXT: 7.00   7.00   cvttps2dq	(%rax), %xmm2
+# CHECK-NEXT: 8.00   8.00   cvttsd2si	%xmm0, %ecx
+# CHECK-NEXT: 8.00   8.00   cvttsd2si	%xmm0, %rcx
+# CHECK-NEXT: 10.00  10.00  cvttsd2si	(%rax), %ecx
+# CHECK-NEXT: 10.00  10.00  cvttsd2si	(%rax), %rcx
 # CHECK-NEXT: 125.00 125.00 divpd	%xmm0, %xmm2
 # CHECK-NEXT: 125.00 125.00 divpd	(%rax), %xmm2
 # CHECK-NEXT: 62.00  62.00  divsd	%xmm0, %xmm2


        


More information about the llvm-commits mailing list