[llvm] 994da65 - [X86][SLM] WriteVecIMul instructions only take 1uop

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 4 05:21:59 PDT 2021


Author: Simon Pilgrim
Date: 2021-09-04T13:21:34+01:00
New Revision: 994da657076900f5ad7fe593c3b5e5f89ab3d53d

URL: https://github.com/llvm/llvm-project/commit/994da657076900f5ad7fe593c3b5e5f89ab3d53d
DIFF: https://github.com/llvm/llvm-project/commit/994da657076900f5ad7fe593c3b5e5f89ab3d53d.diff

LOG: [X86][SLM] WriteVecIMul instructions only take 1uop

The xmm variant have half the throughput (and +1cy latency) of the mmx variants, but are still 1uop.

I still need to do more thorough testing of SLM on test-suite before fixing the obvious bad numbers for WritePMULLD.

But this helps the D103695 helper script get to more accurate numbers for vXi32 multiplies of extended operands (i.e. we can use PMADDWD, PMULLW/PMULHW etc). Matches what Intel AoM / Agner / llvm-exegesis reports.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleSLM.td
    llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
    llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
    llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index a545f3cecb7c..2bcb33e6b0bc 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -372,8 +372,8 @@ defm : SLMWriteResPair<WriteVecALUX,  [SLM_FPC_RSV01],  1>;
 defm : SLMWriteResPair<WriteVecALUY,  [SLM_FPC_RSV01],  1>;
 defm : X86WriteResPairUnsupported<WriteVecALUZ>;
 defm : SLMWriteResPair<WriteVecIMul,  [SLM_FPC_RSV0],   4>;
-defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0],   5, [2], 2>;
-defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0],   5, [2], 2>;
+defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0],   5, [2]>;
+defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0],   5, [2]>;
 defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
 // FIXME: The below is closer to correct, but caused some perf regressions.
 //defm : SLMWriteResPair<WritePMULLD,  [SLM_FPC_RSV0],   11, [11], 7>;

diff  --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
index 8c5fff166cab..dfdfa1320a2a 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
@@ -563,8 +563,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        pextrw	$1, %xmm0, %ecx
 # CHECK-NEXT:  1      1     1.00                        pinsrw	$1, %eax, %xmm0
 # CHECK-NEXT:  1      4     1.00    *                   pinsrw	$1, (%rax), %xmm0
-# CHECK-NEXT:  2      5     2.00                        pmaddwd	%xmm0, %xmm2
-# CHECK-NEXT:  2      8     2.00    *                   pmaddwd	(%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        pmaddwd	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   pmaddwd	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        pmaxsw	%xmm0, %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   pmaxsw	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        pmaxub	%xmm0, %xmm2
@@ -574,16 +574,16 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        pminub	%xmm0, %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   pminub	(%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        pmovmskb	%xmm0, %ecx
-# CHECK-NEXT:  2      5     2.00                        pmulhuw	%xmm0, %xmm2
-# CHECK-NEXT:  2      8     2.00    *                   pmulhuw	(%rax), %xmm2
-# CHECK-NEXT:  2      5     2.00                        pmulhw	%xmm0, %xmm2
-# CHECK-NEXT:  2      8     2.00    *                   pmulhw	(%rax), %xmm2
-# CHECK-NEXT:  2      5     2.00                        pmullw	%xmm0, %xmm2
-# CHECK-NEXT:  2      8     2.00    *                   pmullw	(%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        pmulhuw	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   pmulhuw	(%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        pmulhw	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   pmulhw	(%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        pmullw	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   pmullw	(%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        pmuludq	%mm0, %mm2
 # CHECK-NEXT:  1      7     1.00    *                   pmuludq	(%rax), %mm2
-# CHECK-NEXT:  2      5     2.00                        pmuludq	%xmm0, %xmm2
-# CHECK-NEXT:  2      8     2.00    *                   pmuludq	(%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        pmuludq	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   pmuludq	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        por	%xmm0, %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   por	(%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        psadbw	%xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
index 2bcebead6181..e0e19e681853 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
@@ -237,8 +237,8 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   pmovzxwd	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        pmovzxwq	%xmm0, %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   pmovzxwq	(%rax), %xmm2
-# CHECK-NEXT:  2      5     2.00                        pmuldq	%xmm0, %xmm2
-# CHECK-NEXT:  2      8     2.00    *                   pmuldq	(%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        pmuldq	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   pmuldq	(%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        pmulld	%xmm0, %xmm2
 # CHECK-NEXT:  1      7     1.00    *                   pmulld	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        ptest	%xmm0, %xmm1

diff  --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
index f6c1bfe3bae8..3fb48787d929 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
@@ -148,12 +148,12 @@ psignw      (%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   phsubw	(%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        pmaddubsw	%mm0, %mm2
 # CHECK-NEXT:  1      7     1.00    *                   pmaddubsw	(%rax), %mm2
-# CHECK-NEXT:  2      5     2.00                        pmaddubsw	%xmm0, %xmm2
-# CHECK-NEXT:  2      8     2.00    *                   pmaddubsw	(%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        pmaddubsw	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   pmaddubsw	(%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        pmulhrsw	%mm0, %mm2
 # CHECK-NEXT:  1      7     1.00    *                   pmulhrsw	(%rax), %mm2
-# CHECK-NEXT:  2      5     2.00                        pmulhrsw	%xmm0, %xmm2
-# CHECK-NEXT:  2      8     2.00    *                   pmulhrsw	(%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        pmulhrsw	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   pmulhrsw	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        pshufb	%mm0, %mm2
 # CHECK-NEXT:  1      4     1.00    *                   pshufb	(%rax), %mm2
 # CHECK-NEXT:  4      5     5.00                        pshufb	%xmm0, %xmm2


        


More information about the llvm-commits mailing list