[llvm] 994da65 - [X86][SLM] WriteVecIMul instructions only take 1uop
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 4 05:21:59 PDT 2021
Author: Simon Pilgrim
Date: 2021-09-04T13:21:34+01:00
New Revision: 994da657076900f5ad7fe593c3b5e5f89ab3d53d
URL: https://github.com/llvm/llvm-project/commit/994da657076900f5ad7fe593c3b5e5f89ab3d53d
DIFF: https://github.com/llvm/llvm-project/commit/994da657076900f5ad7fe593c3b5e5f89ab3d53d.diff
LOG: [X86][SLM] WriteVecIMul instructions only take 1uop
The xmm variant have half the throughput (and +1cy latency) of the mmx variants, but are still 1uop.
I still need to do more thorough testing of SLM on test-suite before fixing the obvious bad numbers for WritePMULLD.
But this helps the D103695 helper script get to more accurate numbers for vXi32 multiplies of extended operands (i.e. we can use PMADDWD, PMULLW/PMULHW etc). Matches what Intel AoM / Agner / llvm-exegesis reports.
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleSLM.td
llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index a545f3cecb7c..2bcb33e6b0bc 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -372,8 +372,8 @@ defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
-defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 5, [2], 2>;
-defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 5, [2], 2>;
+defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 5, [2]>;
+defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 5, [2]>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
// FIXME: The below is closer to correct, but caused some perf regressions.
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
index 8c5fff166cab..dfdfa1320a2a 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
@@ -563,8 +563,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pextrw $1, %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 pinsrw $1, %eax, %xmm0
# CHECK-NEXT: 1 4 1.00 * pinsrw $1, (%rax), %xmm0
-# CHECK-NEXT: 2 5 2.00 pmaddwd %xmm0, %xmm2
-# CHECK-NEXT: 2 8 2.00 * pmaddwd (%rax), %xmm2
+# CHECK-NEXT: 1 5 2.00 pmaddwd %xmm0, %xmm2
+# CHECK-NEXT: 1 8 2.00 * pmaddwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * pmaxsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxub %xmm0, %xmm2
@@ -574,16 +574,16 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * pminub (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmovmskb %xmm0, %ecx
-# CHECK-NEXT: 2 5 2.00 pmulhuw %xmm0, %xmm2
-# CHECK-NEXT: 2 8 2.00 * pmulhuw (%rax), %xmm2
-# CHECK-NEXT: 2 5 2.00 pmulhw %xmm0, %xmm2
-# CHECK-NEXT: 2 8 2.00 * pmulhw (%rax), %xmm2
-# CHECK-NEXT: 2 5 2.00 pmullw %xmm0, %xmm2
-# CHECK-NEXT: 2 8 2.00 * pmullw (%rax), %xmm2
+# CHECK-NEXT: 1 5 2.00 pmulhuw %xmm0, %xmm2
+# CHECK-NEXT: 1 8 2.00 * pmulhuw (%rax), %xmm2
+# CHECK-NEXT: 1 5 2.00 pmulhw %xmm0, %xmm2
+# CHECK-NEXT: 1 8 2.00 * pmulhw (%rax), %xmm2
+# CHECK-NEXT: 1 5 2.00 pmullw %xmm0, %xmm2
+# CHECK-NEXT: 1 8 2.00 * pmullw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmuludq %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * pmuludq (%rax), %mm2
-# CHECK-NEXT: 2 5 2.00 pmuludq %xmm0, %xmm2
-# CHECK-NEXT: 2 8 2.00 * pmuludq (%rax), %xmm2
+# CHECK-NEXT: 1 5 2.00 pmuludq %xmm0, %xmm2
+# CHECK-NEXT: 1 8 2.00 * pmuludq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 por %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * por (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 psadbw %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
index 2bcebead6181..e0e19e681853 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
@@ -237,8 +237,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 * pmovzxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pmovzxwq %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * pmovzxwq (%rax), %xmm2
-# CHECK-NEXT: 2 5 2.00 pmuldq %xmm0, %xmm2
-# CHECK-NEXT: 2 8 2.00 * pmuldq (%rax), %xmm2
+# CHECK-NEXT: 1 5 2.00 pmuldq %xmm0, %xmm2
+# CHECK-NEXT: 1 8 2.00 * pmuldq (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmulld %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * pmulld (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 ptest %xmm0, %xmm1
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
index f6c1bfe3bae8..3fb48787d929 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
@@ -148,12 +148,12 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %mm2
-# CHECK-NEXT: 2 5 2.00 pmaddubsw %xmm0, %xmm2
-# CHECK-NEXT: 2 8 2.00 * pmaddubsw (%rax), %xmm2
+# CHECK-NEXT: 1 5 2.00 pmaddubsw %xmm0, %xmm2
+# CHECK-NEXT: 1 8 2.00 * pmaddubsw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmulhrsw %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * pmulhrsw (%rax), %mm2
-# CHECK-NEXT: 2 5 2.00 pmulhrsw %xmm0, %xmm2
-# CHECK-NEXT: 2 8 2.00 * pmulhrsw (%rax), %xmm2
+# CHECK-NEXT: 1 5 2.00 pmulhrsw %xmm0, %xmm2
+# CHECK-NEXT: 1 8 2.00 * pmulhrsw (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pshufb %mm0, %mm2
# CHECK-NEXT: 1 4 1.00 * pshufb (%rax), %mm2
# CHECK-NEXT: 4 5 5.00 pshufb %xmm0, %xmm2
More information about the llvm-commits
mailing list