[llvm] 30498cf - [X86] SkylakeClientModel - conversion instructions don't use Port015
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 10 04:43:49 PST 2022
Author: Simon Pilgrim
Date: 2022-11-10T12:42:51Z
New Revision: 30498cf7c46f90db0f67ff01f0246860e55be0f2
URL: https://github.com/llvm/llvm-project/commit/30498cf7c46f90db0f67ff01f0246860e55be0f2
DIFF: https://github.com/llvm/llvm-project/commit/30498cf7c46f90db0f67ff01f0246860e55be0f2.diff
LOG: [X86] SkylakeClientModel - conversion instructions don't use Port015
Fixes a lot of throughput mismatches - the more complicated conversion instructions use SKLPort5+SKLPort01, not SKLPort5+SKLPort015 (SKLPort015 is mainly used for basic Logic + blend ops)
Fixing this should allow us to remove a lot of unnecessary scheduler overrides from SkylakeClientModel
Confirmed by both Agner + uops.info
Added:
Modified:
llvm/lib/Target/X86/X86SchedSkylakeClient.td
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index efd9fb55332cf..42b81eb9005ee 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -469,14 +469,14 @@ defm : SKLWriteResPair<WriteCvtPD2PS, [SKLPort5,SKLPort01], 5, [1,1], 2, 6>;
defm : SKLWriteResPair<WriteCvtPD2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2, 6>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
-defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteRes<WriteCvtPH2PSLd, [SKLPort23,SKLPort01], 9, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
-defm : X86WriteRes<WriteCvtPS2PH, [SKLPort5,SKLPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PH, [SKLPort5,SKLPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteRes<WriteCvtPS2PHSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 6, [1,1,1,1], 4>;
@@ -928,7 +928,7 @@ def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
-def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> {
+def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
index f28cd83cf8d83..4f7227b58ad5f 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 126.00 333.42 202.42 173.17 173.17 34.00 324.92 5.25 12.67
+# CHECK-NEXT: - 126.00 334.58 203.58 173.17 173.17 34.00 322.58 5.25 12.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1832,7 +1832,7 @@ vzeroupper
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
@@ -1844,7 +1844,7 @@ vzeroupper
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2pd %xmm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2
@@ -1854,21 +1854,21 @@ vzeroupper
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsd2ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtsd2ss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %ecx, %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %rcx, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %ecx, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdq (%rax), %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2ss %ecx, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2ss %ecx, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssl (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssq (%rax), %xmm0, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtss2sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtss2sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvtss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %ecx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %rcx
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s
index 648c7b818a195..03325f2ec2f3a 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s
@@ -45,15 +45,15 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 3.67 3.67 1.67 1.67 2.00 6.67 - 0.67
+# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %ymm0, (%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
index 1c77b2962818f..007eb96400f83 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
@@ -333,7 +333,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 24.00 71.33 24.33 32.00 32.00 8.00 32.83 0.50 3.00
+# CHECK-NEXT: - 24.00 71.83 24.83 32.00 32.00 8.00 31.83 0.50 3.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -353,9 +353,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comiss (%rax), %xmm1
# CHECK-NEXT: - - 2.00 - - - - - - - cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pi %xmm0, %mm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2pi (%rax), %mm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2ss %ecx, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2ss %ecx, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - cvtsi2ss %rcx, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssq (%rax), %xmm2
@@ -363,7 +363,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %ecx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %rcx
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttps2pi %xmm0, %mm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttps2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2pi (%rax), %mm2
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvttss2si %xmm0, %rcx
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
index 082346c542b47..31d260b1bd67a 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
@@ -689,7 +689,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 40.00 111.25 79.25 63.50 63.50 14.00 96.25 2.25 5.00
+# CHECK-NEXT: - 40.00 112.58 80.58 63.50 63.50 14.00 93.58 2.25 5.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -712,9 +712,9 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2dq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2pi %xmm0, %mm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2pi (%rax), %mm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2
@@ -722,7 +722,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtsd2si %xmm0, %rcx
@@ -730,15 +730,15 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtsd2si (%rax), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsd2ss %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtsd2ss (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %ecx, %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %rcx, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2sd %ecx, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtss2sd %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtss2sd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtss2sd (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvttpd2dq (%rax), %xmm2
-# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2pi %xmm0, %mm2
+# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttpd2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvttpd2pi (%rax), %mm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2dq (%rax), %xmm2
More information about the llvm-commits
mailing list