[llvm] e381d8b - [X86][Atom] Fix (U)COMISS/SD uops, latency and throughput
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 19 05:02:42 PDT 2021
Author: Simon Pilgrim
Date: 2021-09-19T12:44:44+01:00
New Revision: e381d8b24329cae6408205f74d0d6d9eaa6b29cf
URL: https://github.com/llvm/llvm-project/commit/e381d8b24329cae6408205f74d0d6d9eaa6b29cf
DIFF: https://github.com/llvm/llvm-project/commit/e381d8b24329cae6408205f74d0d6d9eaa6b29cf.diff
LOG: [X86][Atom] Fix (U)COMISS/SD uops, latency and throughput
Both ports are required, for reg and mem variants - we can also use the WriteFComX class directly and remove the unnecessary InstRW overrides. Matches what Intel AoM / Agner / InstLatX64 report as well.
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleAtom.td
llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index e9020f54f2227..4c6a9397ea1fa 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -249,13 +249,13 @@ defm : AtomWriteResPair<WriteFCmp64X, [AtomPort0,AtomPort1], [AtomPort0,A
defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
-defm : AtomWriteResPair<WriteFComX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFComX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9],[10,10], 4, 5>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [2], [2]>;
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [2], [2]>;
defm : X86WriteResPairUnsupported<WriteFMulY>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [2], [2]>;
-defm : AtomWriteResPair<WriteFMul64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9], [10,10]>;
+defm : AtomWriteResPair<WriteFMul64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9],[10,10]>;
defm : X86WriteResPairUnsupported<WriteFMul64Y>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
@@ -648,7 +648,6 @@ def : InstRW<[AtomWrite01_9], (instrs POPA16, POPA32,
SHLD64rri8, SHRD64rri8,
CMPXCHG8rr)>;
def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
- "(U)?COMIS(D|S)rr",
"CVT(T)?SS2SI64rr(_Int)?")>;
def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
@@ -656,8 +655,7 @@ def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
let ResourceCycles = [10];
}
def : SchedAlias<WriteFLDC, AtomWrite01_10>;
-def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
- "CVT(T)?SS2SI64rm(_Int)?")>;
+def : InstRW<[AtomWrite01_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
let Latency = 11;
diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
index fb6aba02155f1..a75c2bb5a86d2 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s
@@ -206,8 +206,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 7 6.00 * cmpeqps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cmpeqss %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * cmpeqss (%rax), %xmm2
-# CHECK-NEXT: 1 9 4.50 comiss %xmm0, %xmm1
-# CHECK-NEXT: 1 10 5.00 * comiss (%rax), %xmm1
+# CHECK-NEXT: 4 9 9.00 comiss %xmm0, %xmm1
+# CHECK-NEXT: 5 10 10.00 * comiss (%rax), %xmm1
# CHECK-NEXT: 1 5 5.00 cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 cvtps2pi %xmm0, %mm2
@@ -310,8 +310,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 * subps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 subss %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * subss (%rax), %xmm2
-# CHECK-NEXT: 1 9 4.50 ucomiss %xmm0, %xmm1
-# CHECK-NEXT: 1 10 5.00 * ucomiss (%rax), %xmm1
+# CHECK-NEXT: 4 9 9.00 ucomiss %xmm0, %xmm1
+# CHECK-NEXT: 5 10 10.00 * ucomiss (%rax), %xmm1
# CHECK-NEXT: 1 1 1.00 unpckhps %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * unpckhps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 unpcklps %xmm0, %xmm2
@@ -325,7 +325,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
-# CHECK-NEXT: 456.00 411.00
+# CHECK-NEXT: 475.00 430.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
@@ -341,8 +341,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 6.00 6.00 cmpeqps (%rax), %xmm2
# CHECK-NEXT: - 1.00 cmpeqss %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 cmpeqss (%rax), %xmm2
-# CHECK-NEXT: 4.50 4.50 comiss %xmm0, %xmm1
-# CHECK-NEXT: 5.00 5.00 comiss (%rax), %xmm1
+# CHECK-NEXT: 9.00 9.00 comiss %xmm0, %xmm1
+# CHECK-NEXT: 10.00 10.00 comiss (%rax), %xmm1
# CHECK-NEXT: - 5.00 cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: - 5.00 cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: - 5.00 cvtps2pi %xmm0, %mm2
@@ -445,8 +445,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1.00 1.00 subps (%rax), %xmm2
# CHECK-NEXT: - 1.00 subss %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 subss (%rax), %xmm2
-# CHECK-NEXT: 4.50 4.50 ucomiss %xmm0, %xmm1
-# CHECK-NEXT: 5.00 5.00 ucomiss (%rax), %xmm1
+# CHECK-NEXT: 9.00 9.00 ucomiss %xmm0, %xmm1
+# CHECK-NEXT: 10.00 10.00 ucomiss (%rax), %xmm1
# CHECK-NEXT: 1.00 - unpckhps %xmm0, %xmm2
# CHECK-NEXT: 1.00 - unpckhps (%rax), %xmm2
# CHECK-NEXT: 1.00 - unpcklps %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
index 2cd1d8d93c0c7..525ba77cdaab0 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
@@ -420,8 +420,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 7 6.00 * cmpeqpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cmpeqsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * cmpeqsd (%rax), %xmm2
-# CHECK-NEXT: 1 9 4.50 comisd %xmm0, %xmm1
-# CHECK-NEXT: 1 10 5.00 * comisd (%rax), %xmm1
+# CHECK-NEXT: 4 9 9.00 comisd %xmm0, %xmm1
+# CHECK-NEXT: 5 10 10.00 * comisd (%rax), %xmm1
# CHECK-NEXT: 1 7 6.00 cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 1 8 7.00 * cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 1 6 5.00 cvtdq2ps %xmm0, %xmm2
@@ -666,8 +666,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 7 6.00 * subpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 subsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * subsd (%rax), %xmm2
-# CHECK-NEXT: 1 9 4.50 ucomisd %xmm0, %xmm1
-# CHECK-NEXT: 1 10 5.00 * ucomisd (%rax), %xmm1
+# CHECK-NEXT: 4 9 9.00 ucomisd %xmm0, %xmm1
+# CHECK-NEXT: 5 10 10.00 * ucomisd (%rax), %xmm1
# CHECK-NEXT: 1 1 1.00 unpckhpd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * unpckhpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 unpcklpd %xmm0, %xmm2
@@ -681,7 +681,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
-# CHECK-NEXT: 913.00 773.00
+# CHECK-NEXT: 932.00 792.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
@@ -698,8 +698,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 6.00 6.00 cmpeqpd (%rax), %xmm2
# CHECK-NEXT: - 1.00 cmpeqsd %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 cmpeqsd (%rax), %xmm2
-# CHECK-NEXT: 4.50 4.50 comisd %xmm0, %xmm1
-# CHECK-NEXT: 5.00 5.00 comisd (%rax), %xmm1
+# CHECK-NEXT: 9.00 9.00 comisd %xmm0, %xmm1
+# CHECK-NEXT: 10.00 10.00 comisd (%rax), %xmm1
# CHECK-NEXT: 6.00 6.00 cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 7.00 7.00 cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 5.00 5.00 cvtdq2ps %xmm0, %xmm2
@@ -944,8 +944,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 6.00 6.00 subpd (%rax), %xmm2
# CHECK-NEXT: - 1.00 subsd %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 subsd (%rax), %xmm2
-# CHECK-NEXT: 4.50 4.50 ucomisd %xmm0, %xmm1
-# CHECK-NEXT: 5.00 5.00 ucomisd (%rax), %xmm1
+# CHECK-NEXT: 9.00 9.00 ucomisd %xmm0, %xmm1
+# CHECK-NEXT: 10.00 10.00 ucomisd (%rax), %xmm1
# CHECK-NEXT: 1.00 - unpckhpd %xmm0, %xmm2
# CHECK-NEXT: 1.00 - unpckhpd (%rax), %xmm2
# CHECK-NEXT: 1.00 - unpcklpd %xmm0, %xmm2
More information about the llvm-commits
mailing list