[llvm] 65ad09d - [X86][SLM] Fix DIVPD/DIVPS/RCPPS/RSQRTPS/SQRTPD/SQRTPS/DPPD/DPPS uops, latency and throughput
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 13 00:52:24 PDT 2021
Author: Simon Pilgrim
Date: 2021-09-13T08:36:43+01:00
New Revision: 65ad09da0ea7b947ce4bfa96dee00a53230f4cf9
URL: https://github.com/llvm/llvm-project/commit/65ad09da0ea7b947ce4bfa96dee00a53230f4cf9
DIFF: https://github.com/llvm/llvm-project/commit/65ad09da0ea7b947ce4bfa96dee00a53230f4cf9.diff
LOG: [X86][SLM] Fix DIVPD/DIVPS/RCPPS/RSQRTPS/SQRTPD/SQRTPS/DPPD/DPPS uops, latency and throughput
The packed variants of the instructions had been modelled as the same as the scalar variants.
Reported during a run of llvm-exegesis on a cheap SLM box and matches what Agner / InstLatX64 report as well.
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleSLM.td
llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s
llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 36d0cadf62962..b0a2c75801c74 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -233,33 +233,33 @@ defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
-defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
-defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39], 6, 1>;
+defm : X86WriteResPairUnsupported<WriteFDivY>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
defm : SLMWriteResPair<WriteFDiv64, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
-defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
-defm : SLMWriteResPair<WriteFDiv64Y, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69], 6, 1>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
-defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFRcpY>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
-defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20]>;
-defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40]>;
-defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40]>;
+defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0, SLMFPDivider], 20, [1,20]>;
+defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0, SLMFPDivider], 41, [1,40], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrtY>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
-defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35]>;
-defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70]>;
-defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70]>;
+defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0, SLMFPDivider], 35, [1,35]>;
+defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0, SLMFPDivider], 71, [1,70], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
-defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 12, [8], 5, 1>;
+defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 15, [12], 9, 1>;
+defm : X86WriteResPairUnsupported<WriteDPPSY>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s
index c9a32714d7541..8c0023ef8d0fb 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s
@@ -226,8 +226,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %ecx
# CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %rcx
-# CHECK-NEXT: 1 39 39.00 divps %xmm0, %xmm2
-# CHECK-NEXT: 1 42 39.00 * divps (%rax), %xmm2
+# CHECK-NEXT: 6 39 39.00 divps %xmm0, %xmm2
+# CHECK-NEXT: 7 42 39.00 * divps (%rax), %xmm2
# CHECK-NEXT: 1 19 17.00 divss %xmm0, %xmm2
# CHECK-NEXT: 1 22 17.00 * divss (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 * * U ldmxcsr (%rax)
@@ -290,19 +290,19 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 7 1.00 * psadbw (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 pshufw $1, %mm0, %mm2
# CHECK-NEXT: 1 4 1.00 * pshufw $1, (%rax), %mm2
-# CHECK-NEXT: 1 5 1.00 rcpps %xmm0, %xmm2
-# CHECK-NEXT: 1 8 1.00 * rcpps (%rax), %xmm2
-# CHECK-NEXT: 1 5 1.00 rcpss %xmm0, %xmm2
-# CHECK-NEXT: 1 8 1.00 * rcpss (%rax), %xmm2
-# CHECK-NEXT: 1 5 1.00 rsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 1 8 1.00 * rsqrtps (%rax), %xmm2
-# CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2
-# CHECK-NEXT: 1 8 1.00 * rsqrtss (%rax), %xmm2
+# CHECK-NEXT: 5 9 8.00 rcpps %xmm0, %xmm2
+# CHECK-NEXT: 6 12 8.00 * rcpps (%rax), %xmm2
+# CHECK-NEXT: 1 4 1.00 rcpss %xmm0, %xmm2
+# CHECK-NEXT: 1 7 1.00 * rcpss (%rax), %xmm2
+# CHECK-NEXT: 5 9 8.00 rsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 6 12 8.00 * rsqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 4 1.00 rsqrtss %xmm0, %xmm2
+# CHECK-NEXT: 1 7 1.00 * rsqrtss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * * U sfence
# CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * shufps $1, (%rax), %xmm2
-# CHECK-NEXT: 1 41 40.00 sqrtps %xmm0, %xmm2
-# CHECK-NEXT: 1 44 40.00 * sqrtps (%rax), %xmm2
+# CHECK-NEXT: 5 41 40.00 sqrtps %xmm0, %xmm2
+# CHECK-NEXT: 6 44 40.00 * sqrtps (%rax), %xmm2
# CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2
# CHECK-NEXT: 1 23 20.00 * sqrtss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * U stmxcsr (%rax)
@@ -331,7 +331,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - 232.00 8.00 80.00 37.00 0.50 0.50 67.00
+# CHECK-NEXT: - 232.00 8.00 108.00 37.00 0.50 0.50 67.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -431,12 +431,12 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 psadbw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pshufw $1, %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pshufw $1, (%rax), %mm2
-# CHECK-NEXT: - - - 1.00 - - - - rcpps %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 rcpps (%rax), %xmm2
+# CHECK-NEXT: - - - 8.00 - - - - rcpps %xmm0, %xmm2
+# CHECK-NEXT: - - - 8.00 - - - 1.00 rcpps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - rcpss %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 rcpss (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - - - - rsqrtps %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 rsqrtps (%rax), %xmm2
+# CHECK-NEXT: - - - 8.00 - - - - rsqrtps %xmm0, %xmm2
+# CHECK-NEXT: - - - 8.00 - - - 1.00 rsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - rsqrtss %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 rsqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 sfence
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
index e00613e7f82b0..6c88d2bdc977f 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
@@ -460,8 +460,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cvttsd2si %xmm0, %rcx
# CHECK-NEXT: 1 8 1.00 * cvttsd2si (%rax), %ecx
# CHECK-NEXT: 1 8 1.00 * cvttsd2si (%rax), %rcx
-# CHECK-NEXT: 1 69 69.00 divpd %xmm0, %xmm2
-# CHECK-NEXT: 1 72 69.00 * divpd (%rax), %xmm2
+# CHECK-NEXT: 6 69 69.00 divpd %xmm0, %xmm2
+# CHECK-NEXT: 7 72 69.00 * divpd (%rax), %xmm2
# CHECK-NEXT: 1 34 32.00 divsd %xmm0, %xmm2
# CHECK-NEXT: 1 37 32.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * * U lfence
@@ -658,8 +658,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 * pxor (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 shufpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * shufpd $1, (%rax), %xmm2
-# CHECK-NEXT: 1 71 70.00 sqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 1 74 70.00 * sqrtpd (%rax), %xmm2
+# CHECK-NEXT: 5 71 70.00 sqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 6 74 70.00 * sqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 35 35.00 sqrtsd %xmm0, %xmm2
# CHECK-NEXT: 1 38 35.00 * sqrtsd (%rax), %xmm2
# CHECK-NEXT: 1 4 2.00 subpd %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
index 6fb8d19d1fa97..3779e5a7895bc 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
@@ -163,10 +163,10 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 3 7 4.00 * blendvpd %xmm0, (%rax), %xmm2
# CHECK-NEXT: 2 4 4.00 blendvps %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 7 4.00 * blendvps %xmm0, (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 dppd $22, %xmm0, %xmm2
-# CHECK-NEXT: 1 6 1.00 * dppd $22, (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: 1 6 1.00 * dpps $22, (%rax), %xmm2
+# CHECK-NEXT: 5 12 8.00 dppd $22, %xmm0, %xmm2
+# CHECK-NEXT: 6 15 8.00 * dppd $22, (%rax), %xmm2
+# CHECK-NEXT: 9 15 12.00 dpps $22, %xmm0, %xmm2
+# CHECK-NEXT: 10 18 12.00 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 extractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 4 2.00 * extractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
@@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - - - 104.00 25.00 - - 54.00
+# CHECK-NEXT: - - - 104.00 61.00 - - 54.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -276,10 +276,10 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvpd %xmm0, (%rax), %xmm2
# CHECK-NEXT: - - - 4.00 - - - - blendvps %xmm0, %xmm0, %xmm2
# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvps %xmm0, (%rax), %xmm2
-# CHECK-NEXT: - - - - 1.00 - - - dppd $22, %xmm0, %xmm2
-# CHECK-NEXT: - - - - 1.00 - - 1.00 dppd $22, (%rax), %xmm2
-# CHECK-NEXT: - - - - 1.00 - - - dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: - - - - 1.00 - - 1.00 dpps $22, (%rax), %xmm2
+# CHECK-NEXT: - - - - 8.00 - - - dppd $22, %xmm0, %xmm2
+# CHECK-NEXT: - - - - 8.00 - - 1.00 dppd $22, (%rax), %xmm2
+# CHECK-NEXT: - - - - 12.00 - - - dpps $22, %xmm0, %xmm2
+# CHECK-NEXT: - - - - 12.00 - - 1.00 dpps $22, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - extractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - 1.00 - - - 2.00 extractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 - - - - insertps $1, %xmm0, %xmm2
More information about the llvm-commits
mailing list