[llvm] 9ad5969 - [X86][Atom] Fix CVT uops + port usage
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 12 14:59:53 PST 2021
Author: Simon Pilgrim
Date: 2021-12-12T22:57:53Z
New Revision: 9ad5969b5e07e24d14de575533efccfc4162e043
URL: https://github.com/llvm/llvm-project/commit/9ad5969b5e07e24d14de575533efccfc4162e043
DIFF: https://github.com/llvm/llvm-project/commit/9ad5969b5e07e24d14de575533efccfc4162e043.diff
LOG: [X86][Atom] Fix CVT uops + port usage
Fix overrides to use both ports. Update the uops counts + port usage based off the most recent llvm-exegesis captures (PR36895) and what Intel AoM / Agner reports as well.
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleAtom.td
llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index 292dbbb108e8e..0fedfc01092c8 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -320,30 +320,30 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
// Conversions.
////////////////////////////////////////////////////////////////////////////////
-defm : AtomWriteResPair<WriteCvtSS2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 8, 9, [7,7], [6,6]>;
-defm : AtomWriteResPair<WriteCvtPS2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
+defm : AtomWriteResPair<WriteCvtSS2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 8, 9, [8,8], [9,9], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPS2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 3, 4>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IY>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
-defm : AtomWriteResPair<WriteCvtSD2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 8, 9, [7,7], [6,6]>;
-defm : AtomWriteResPair<WriteCvtPD2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7]>;
+defm : AtomWriteResPair<WriteCvtSD2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 8, 9, [8,8],[10,10], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPD2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [7,7], [8,8], 4, 5>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IY>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
-defm : AtomWriteResPair<WriteCvtI2SS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
-defm : AtomWriteResPair<WriteCvtI2PS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
+defm : AtomWriteResPair<WriteCvtI2SS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [6,6], 3, 1>;
+defm : AtomWriteResPair<WriteCvtI2PS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 3, 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
-defm : AtomWriteResPair<WriteCvtI2SD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
-defm : AtomWriteResPair<WriteCvtI2PD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7]>;
+defm : AtomWriteResPair<WriteCvtI2SD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 3, 3>;
+defm : AtomWriteResPair<WriteCvtI2PD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7], 3, 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDY>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
-defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
-defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7]>;
+defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7], 4, 5>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDY>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
-defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
-defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7]>;
+defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 10, 11,[10,10],[12,12], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 11, 12,[11,11],[12,12], 4, 5>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
@@ -550,6 +550,40 @@ def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIrm, MMX_CVTTPS2PIrm)>;
def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
+def AtomWrite0_1_7 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+ let Latency = 7;
+ let ResourceCycles = [6,6];
+}
+def : InstRW<[AtomWrite0_1_7], (instregex "CVTSI642SDrm(_Int)?")>;
+
+def AtomWrite0_1_7_4 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+ let Latency = 7;
+ let ResourceCycles = [8,8];
+ let NumMicroOps = 4;
+}
+def : InstRW<[AtomWrite0_1_7_4], (instregex "CVTSI642SSrr(_Int)?")>;
+
+def AtomWrite0_1_8_4 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+ let Latency = 8;
+ let ResourceCycles = [8,8];
+ let NumMicroOps = 4;
+}
+def : InstRW<[AtomWrite0_1_7_4], (instregex "CVTSI642SSrm(_Int)?")>;
+
+def AtomWrite0_1_9 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+ let Latency = 9;
+ let ResourceCycles = [9,9];
+ let NumMicroOps = 4;
+}
+def : InstRW<[AtomWrite0_1_9], (instregex "CVT(T)?SS2SI64rr(_Int)?")>;
+
+def AtomWrite0_1_10 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+ let Latency = 10;
+ let ResourceCycles = [11,11];
+ let NumMicroOps = 5;
+}
+def : InstRW<[AtomWrite0_1_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
+
// Port0 or Port1
def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
let Latency = 1;
@@ -647,15 +681,13 @@ def : InstRW<[AtomWrite01_9], (instrs POPA16, POPA32,
SHLD64mri8, SHRD64mri8,
SHLD64rri8, SHRD64rri8,
CMPXCHG8rr)>;
-def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
- "CVT(T)?SS2SI64rr(_Int)?")>;
+def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F")>;
def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
let Latency = 10;
let ResourceCycles = [10];
}
def : SchedAlias<WriteFLDC, AtomWrite01_10>;
-def : InstRW<[AtomWrite01_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
let Latency = 11;
diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
index 97cee7d01bffe..25011cd1aea89 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
@@ -212,20 +212,20 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 * cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 cvtps2pi %xmm0, %mm2
# CHECK-NEXT: 1 5 5.00 * cvtps2pi (%rax), %mm2
-# CHECK-NEXT: 1 6 5.00 cvtsi2ss %ecx, %xmm2
-# CHECK-NEXT: 1 6 5.00 cvtsi2ss %rcx, %xmm2
+# CHECK-NEXT: 3 6 6.00 cvtsi2ss %ecx, %xmm2
+# CHECK-NEXT: 4 7 8.00 cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: 1 7 6.00 * cvtsi2ssl (%rax), %xmm2
-# CHECK-NEXT: 1 7 6.00 * cvtsi2ssq (%rax), %xmm2
-# CHECK-NEXT: 1 8 7.00 cvtss2si %xmm0, %ecx
-# CHECK-NEXT: 1 9 4.50 cvtss2si %xmm0, %rcx
-# CHECK-NEXT: 1 9 6.00 * cvtss2si (%rax), %ecx
-# CHECK-NEXT: 1 10 5.00 * cvtss2si (%rax), %rcx
+# CHECK-NEXT: 4 7 8.00 * cvtsi2ssq (%rax), %xmm2
+# CHECK-NEXT: 3 8 8.00 cvtss2si %xmm0, %ecx
+# CHECK-NEXT: 4 9 9.00 cvtss2si %xmm0, %rcx
+# CHECK-NEXT: 4 9 9.00 * cvtss2si (%rax), %ecx
+# CHECK-NEXT: 5 10 11.00 * cvtss2si (%rax), %rcx
# CHECK-NEXT: 1 5 5.00 cvttps2pi %xmm0, %mm2
# CHECK-NEXT: 1 5 5.00 * cvttps2pi (%rax), %mm2
-# CHECK-NEXT: 1 8 7.00 cvttss2si %xmm0, %ecx
-# CHECK-NEXT: 1 9 4.50 cvttss2si %xmm0, %rcx
-# CHECK-NEXT: 1 9 6.00 * cvttss2si (%rax), %ecx
-# CHECK-NEXT: 1 10 5.00 * cvttss2si (%rax), %rcx
+# CHECK-NEXT: 3 8 8.00 cvttss2si %xmm0, %ecx
+# CHECK-NEXT: 4 9 9.00 cvttss2si %xmm0, %rcx
+# CHECK-NEXT: 4 9 9.00 * cvttss2si (%rax), %ecx
+# CHECK-NEXT: 5 10 11.00 * cvttss2si (%rax), %rcx
# CHECK-NEXT: 6 70 70.00 divps %xmm0, %xmm2
# CHECK-NEXT: 7 70 70.00 * divps (%rax), %xmm2
# CHECK-NEXT: 3 34 34.00 divss %xmm0, %xmm2
@@ -325,7 +325,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
-# CHECK-NEXT: 712.00 667.00
+# CHECK-NEXT: 747.00 702.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
@@ -347,20 +347,20 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - 5.00 cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: - 5.00 cvtps2pi %xmm0, %mm2
# CHECK-NEXT: 5.00 5.00 cvtps2pi (%rax), %mm2
-# CHECK-NEXT: 5.00 5.00 cvtsi2ss %ecx, %xmm2
-# CHECK-NEXT: 5.00 5.00 cvtsi2ss %rcx, %xmm2
+# CHECK-NEXT: 6.00 6.00 cvtsi2ss %ecx, %xmm2
+# CHECK-NEXT: 8.00 8.00 cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: 6.00 6.00 cvtsi2ssl (%rax), %xmm2
-# CHECK-NEXT: 6.00 6.00 cvtsi2ssq (%rax), %xmm2
-# CHECK-NEXT: 7.00 7.00 cvtss2si %xmm0, %ecx
-# CHECK-NEXT: 4.50 4.50 cvtss2si %xmm0, %rcx
-# CHECK-NEXT: 6.00 6.00 cvtss2si (%rax), %ecx
-# CHECK-NEXT: 5.00 5.00 cvtss2si (%rax), %rcx
+# CHECK-NEXT: 8.00 8.00 cvtsi2ssq (%rax), %xmm2
+# CHECK-NEXT: 8.00 8.00 cvtss2si %xmm0, %ecx
+# CHECK-NEXT: 9.00 9.00 cvtss2si %xmm0, %rcx
+# CHECK-NEXT: 9.00 9.00 cvtss2si (%rax), %ecx
+# CHECK-NEXT: 11.00 11.00 cvtss2si (%rax), %rcx
# CHECK-NEXT: - 5.00 cvttps2pi %xmm0, %mm2
# CHECK-NEXT: 5.00 5.00 cvttps2pi (%rax), %mm2
-# CHECK-NEXT: 7.00 7.00 cvttss2si %xmm0, %ecx
-# CHECK-NEXT: 4.50 4.50 cvttss2si %xmm0, %rcx
-# CHECK-NEXT: 6.00 6.00 cvttss2si (%rax), %ecx
-# CHECK-NEXT: 5.00 5.00 cvttss2si (%rax), %rcx
+# CHECK-NEXT: 8.00 8.00 cvttss2si %xmm0, %ecx
+# CHECK-NEXT: 9.00 9.00 cvttss2si %xmm0, %rcx
+# CHECK-NEXT: 9.00 9.00 cvttss2si (%rax), %ecx
+# CHECK-NEXT: 11.00 11.00 cvttss2si (%rax), %rcx
# CHECK-NEXT: 70.00 70.00 divps %xmm0, %xmm2
# CHECK-NEXT: 70.00 70.00 divps (%rax), %xmm2
# CHECK-NEXT: 34.00 34.00 divss %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
index a9d0c81c99d6a..82d40dca00f7c 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
@@ -422,44 +422,44 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 * cmpeqsd (%rax), %xmm2
# CHECK-NEXT: 4 9 9.00 comisd %xmm0, %xmm1
# CHECK-NEXT: 5 10 10.00 * comisd (%rax), %xmm1
-# CHECK-NEXT: 1 7 6.00 cvtdq2pd %xmm0, %xmm2
-# CHECK-NEXT: 1 8 7.00 * cvtdq2pd (%rax), %xmm2
-# CHECK-NEXT: 1 6 5.00 cvtdq2ps %xmm0, %xmm2
-# CHECK-NEXT: 1 7 6.00 * cvtdq2ps (%rax), %xmm2
-# CHECK-NEXT: 1 7 6.00 cvtpd2dq %xmm0, %xmm2
-# CHECK-NEXT: 1 8 7.00 * cvtpd2dq (%rax), %xmm2
-# CHECK-NEXT: 1 7 6.00 cvtpd2pi %xmm0, %mm2
-# CHECK-NEXT: 1 8 7.00 * cvtpd2pi (%rax), %mm2
-# CHECK-NEXT: 1 7 6.00 cvtpd2ps %xmm0, %xmm2
-# CHECK-NEXT: 1 8 7.00 * cvtpd2ps (%rax), %xmm2
-# CHECK-NEXT: 1 7 6.00 cvtpi2pd %mm0, %xmm2
-# CHECK-NEXT: 1 8 7.00 * cvtpi2pd (%rax), %xmm2
-# CHECK-NEXT: 1 6 5.00 cvtps2dq %xmm0, %xmm2
-# CHECK-NEXT: 1 7 6.00 * cvtps2dq (%rax), %xmm2
-# CHECK-NEXT: 1 7 6.00 cvtps2pd %xmm0, %xmm2
-# CHECK-NEXT: 1 8 7.00 * cvtps2pd (%rax), %xmm2
-# CHECK-NEXT: 1 8 7.00 cvtsd2si %xmm0, %ecx
-# CHECK-NEXT: 1 8 7.00 cvtsd2si %xmm0, %rcx
-# CHECK-NEXT: 1 9 6.00 * cvtsd2si (%rax), %ecx
-# CHECK-NEXT: 1 9 6.00 * cvtsd2si (%rax), %rcx
-# CHECK-NEXT: 1 6 5.00 cvtsd2ss %xmm0, %xmm2
-# CHECK-NEXT: 1 7 6.00 * cvtsd2ss (%rax), %xmm2
-# CHECK-NEXT: 1 6 5.00 cvtsi2sd %ecx, %xmm2
-# CHECK-NEXT: 1 6 5.00 cvtsi2sd %rcx, %xmm2
-# CHECK-NEXT: 1 7 6.00 * cvtsi2sdl (%rax), %xmm2
+# CHECK-NEXT: 3 7 6.00 cvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT: 4 8 7.00 * cvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: 3 6 6.00 cvtdq2ps %xmm0, %xmm2
+# CHECK-NEXT: 4 7 7.00 * cvtdq2ps (%rax), %xmm2
+# CHECK-NEXT: 4 7 7.00 cvtpd2dq %xmm0, %xmm2
+# CHECK-NEXT: 5 8 8.00 * cvtpd2dq (%rax), %xmm2
+# CHECK-NEXT: 4 7 7.00 cvtpd2pi %xmm0, %mm2
+# CHECK-NEXT: 5 8 8.00 * cvtpd2pi (%rax), %mm2
+# CHECK-NEXT: 4 11 11.00 cvtpd2ps %xmm0, %xmm2
+# CHECK-NEXT: 5 12 12.00 * cvtpd2ps (%rax), %xmm2
+# CHECK-NEXT: 3 7 6.00 cvtpi2pd %mm0, %xmm2
+# CHECK-NEXT: 4 8 7.00 * cvtpi2pd (%rax), %xmm2
+# CHECK-NEXT: 3 6 6.00 cvtps2dq %xmm0, %xmm2
+# CHECK-NEXT: 4 7 7.00 * cvtps2dq (%rax), %xmm2
+# CHECK-NEXT: 4 7 6.00 cvtps2pd %xmm0, %xmm2
+# CHECK-NEXT: 5 8 7.00 * cvtps2pd (%rax), %xmm2
+# CHECK-NEXT: 3 8 8.00 cvtsd2si %xmm0, %ecx
+# CHECK-NEXT: 3 8 8.00 cvtsd2si %xmm0, %rcx
+# CHECK-NEXT: 4 9 10.00 * cvtsd2si (%rax), %ecx
+# CHECK-NEXT: 4 9 10.00 * cvtsd2si (%rax), %rcx
+# CHECK-NEXT: 3 10 10.00 cvtsd2ss %xmm0, %xmm2
+# CHECK-NEXT: 4 11 12.00 * cvtsd2ss (%rax), %xmm2
+# CHECK-NEXT: 3 6 6.00 cvtsi2sd %ecx, %xmm2
+# CHECK-NEXT: 3 6 6.00 cvtsi2sd %rcx, %xmm2
+# CHECK-NEXT: 3 7 7.00 * cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 1 7 6.00 * cvtsi2sdq (%rax), %xmm2
-# CHECK-NEXT: 1 6 5.00 cvtss2sd %xmm0, %xmm2
-# CHECK-NEXT: 1 7 6.00 * cvtss2sd (%rax), %xmm2
-# CHECK-NEXT: 1 7 6.00 cvttpd2dq %xmm0, %xmm2
-# CHECK-NEXT: 1 8 7.00 * cvttpd2dq (%rax), %xmm2
-# CHECK-NEXT: 1 7 6.00 cvttpd2pi %xmm0, %mm2
-# CHECK-NEXT: 1 8 7.00 * cvttpd2pi (%rax), %mm2
-# CHECK-NEXT: 1 6 5.00 cvttps2dq %xmm0, %xmm2
-# CHECK-NEXT: 1 7 6.00 * cvttps2dq (%rax), %xmm2
-# CHECK-NEXT: 1 8 7.00 cvttsd2si %xmm0, %ecx
-# CHECK-NEXT: 1 8 7.00 cvttsd2si %xmm0, %rcx
-# CHECK-NEXT: 1 9 6.00 * cvttsd2si (%rax), %ecx
-# CHECK-NEXT: 1 9 6.00 * cvttsd2si (%rax), %rcx
+# CHECK-NEXT: 3 6 6.00 cvtss2sd %xmm0, %xmm2
+# CHECK-NEXT: 4 7 7.00 * cvtss2sd (%rax), %xmm2
+# CHECK-NEXT: 4 7 7.00 cvttpd2dq %xmm0, %xmm2
+# CHECK-NEXT: 5 8 8.00 * cvttpd2dq (%rax), %xmm2
+# CHECK-NEXT: 4 7 7.00 cvttpd2pi %xmm0, %mm2
+# CHECK-NEXT: 5 8 8.00 * cvttpd2pi (%rax), %mm2
+# CHECK-NEXT: 3 6 6.00 cvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: 4 7 7.00 * cvttps2dq (%rax), %xmm2
+# CHECK-NEXT: 3 8 8.00 cvttsd2si %xmm0, %ecx
+# CHECK-NEXT: 3 8 8.00 cvttsd2si %xmm0, %rcx
+# CHECK-NEXT: 4 9 10.00 * cvttsd2si (%rax), %ecx
+# CHECK-NEXT: 4 9 10.00 * cvttsd2si (%rax), %rcx
# CHECK-NEXT: 6 125 125.00 divpd %xmm0, %xmm2
# CHECK-NEXT: 7 125 125.00 * divpd (%rax), %xmm2
# CHECK-NEXT: 3 62 62.00 divsd %xmm0, %xmm2
@@ -681,7 +681,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
-# CHECK-NEXT: 1306.00 1166.00
+# CHECK-NEXT: 1366.00 1226.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
@@ -702,42 +702,42 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 10.00 10.00 comisd (%rax), %xmm1
# CHECK-NEXT: 6.00 6.00 cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 7.00 7.00 cvtdq2pd (%rax), %xmm2
-# CHECK-NEXT: 5.00 5.00 cvtdq2ps %xmm0, %xmm2
-# CHECK-NEXT: 6.00 6.00 cvtdq2ps (%rax), %xmm2
-# CHECK-NEXT: 6.00 6.00 cvtpd2dq %xmm0, %xmm2
-# CHECK-NEXT: 7.00 7.00 cvtpd2dq (%rax), %xmm2
-# CHECK-NEXT: 6.00 6.00 cvtpd2pi %xmm0, %mm2
-# CHECK-NEXT: 7.00 7.00 cvtpd2pi (%rax), %mm2
-# CHECK-NEXT: 6.00 6.00 cvtpd2ps %xmm0, %xmm2
-# CHECK-NEXT: 7.00 7.00 cvtpd2ps (%rax), %xmm2
+# CHECK-NEXT: 6.00 6.00 cvtdq2ps %xmm0, %xmm2
+# CHECK-NEXT: 7.00 7.00 cvtdq2ps (%rax), %xmm2
+# CHECK-NEXT: 7.00 7.00 cvtpd2dq %xmm0, %xmm2
+# CHECK-NEXT: 8.00 8.00 cvtpd2dq (%rax), %xmm2
+# CHECK-NEXT: 7.00 7.00 cvtpd2pi %xmm0, %mm2
+# CHECK-NEXT: 8.00 8.00 cvtpd2pi (%rax), %mm2
+# CHECK-NEXT: 11.00 11.00 cvtpd2ps %xmm0, %xmm2
+# CHECK-NEXT: 12.00 12.00 cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: 6.00 6.00 cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: 7.00 7.00 cvtpi2pd (%rax), %xmm2
-# CHECK-NEXT: 5.00 5.00 cvtps2dq %xmm0, %xmm2
-# CHECK-NEXT: 6.00 6.00 cvtps2dq (%rax), %xmm2
+# CHECK-NEXT: 6.00 6.00 cvtps2dq %xmm0, %xmm2
+# CHECK-NEXT: 7.00 7.00 cvtps2dq (%rax), %xmm2
# CHECK-NEXT: 6.00 6.00 cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 7.00 7.00 cvtps2pd (%rax), %xmm2
-# CHECK-NEXT: 7.00 7.00 cvtsd2si %xmm0, %ecx
-# CHECK-NEXT: 7.00 7.00 cvtsd2si %xmm0, %rcx
-# CHECK-NEXT: 6.00 6.00 cvtsd2si (%rax), %ecx
-# CHECK-NEXT: 6.00 6.00 cvtsd2si (%rax), %rcx
-# CHECK-NEXT: 5.00 5.00 cvtsd2ss %xmm0, %xmm2
-# CHECK-NEXT: 6.00 6.00 cvtsd2ss (%rax), %xmm2
-# CHECK-NEXT: 5.00 5.00 cvtsi2sd %ecx, %xmm2
-# CHECK-NEXT: 5.00 5.00 cvtsi2sd %rcx, %xmm2
-# CHECK-NEXT: 6.00 6.00 cvtsi2sdl (%rax), %xmm2
+# CHECK-NEXT: 8.00 8.00 cvtsd2si %xmm0, %ecx
+# CHECK-NEXT: 8.00 8.00 cvtsd2si %xmm0, %rcx
+# CHECK-NEXT: 10.00 10.00 cvtsd2si (%rax), %ecx
+# CHECK-NEXT: 10.00 10.00 cvtsd2si (%rax), %rcx
+# CHECK-NEXT: 10.00 10.00 cvtsd2ss %xmm0, %xmm2
+# CHECK-NEXT: 12.00 12.00 cvtsd2ss (%rax), %xmm2
+# CHECK-NEXT: 6.00 6.00 cvtsi2sd %ecx, %xmm2
+# CHECK-NEXT: 6.00 6.00 cvtsi2sd %rcx, %xmm2
+# CHECK-NEXT: 7.00 7.00 cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 6.00 6.00 cvtsi2sdq (%rax), %xmm2
-# CHECK-NEXT: 5.00 5.00 cvtss2sd %xmm0, %xmm2
-# CHECK-NEXT: 6.00 6.00 cvtss2sd (%rax), %xmm2
-# CHECK-NEXT: 6.00 6.00 cvttpd2dq %xmm0, %xmm2
-# CHECK-NEXT: 7.00 7.00 cvttpd2dq (%rax), %xmm2
-# CHECK-NEXT: 6.00 6.00 cvttpd2pi %xmm0, %mm2
-# CHECK-NEXT: 7.00 7.00 cvttpd2pi (%rax), %mm2
-# CHECK-NEXT: 5.00 5.00 cvttps2dq %xmm0, %xmm2
-# CHECK-NEXT: 6.00 6.00 cvttps2dq (%rax), %xmm2
-# CHECK-NEXT: 7.00 7.00 cvttsd2si %xmm0, %ecx
-# CHECK-NEXT: 7.00 7.00 cvttsd2si %xmm0, %rcx
-# CHECK-NEXT: 6.00 6.00 cvttsd2si (%rax), %ecx
-# CHECK-NEXT: 6.00 6.00 cvttsd2si (%rax), %rcx
+# CHECK-NEXT: 6.00 6.00 cvtss2sd %xmm0, %xmm2
+# CHECK-NEXT: 7.00 7.00 cvtss2sd (%rax), %xmm2
+# CHECK-NEXT: 7.00 7.00 cvttpd2dq %xmm0, %xmm2
+# CHECK-NEXT: 8.00 8.00 cvttpd2dq (%rax), %xmm2
+# CHECK-NEXT: 7.00 7.00 cvttpd2pi %xmm0, %mm2
+# CHECK-NEXT: 8.00 8.00 cvttpd2pi (%rax), %mm2
+# CHECK-NEXT: 6.00 6.00 cvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: 7.00 7.00 cvttps2dq (%rax), %xmm2
+# CHECK-NEXT: 8.00 8.00 cvttsd2si %xmm0, %ecx
+# CHECK-NEXT: 8.00 8.00 cvttsd2si %xmm0, %rcx
+# CHECK-NEXT: 10.00 10.00 cvttsd2si (%rax), %ecx
+# CHECK-NEXT: 10.00 10.00 cvttsd2si (%rax), %rcx
# CHECK-NEXT: 125.00 125.00 divpd %xmm0, %xmm2
# CHECK-NEXT: 125.00 125.00 divpd (%rax), %xmm2
# CHECK-NEXT: 62.00 62.00 divsd %xmm0, %xmm2
More information about the llvm-commits
mailing list