[llvm] 484944a - [X86][SLM] Fix HADD/HSUB uops, latency and throughput

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 11 03:44:52 PDT 2021


Author: Simon Pilgrim
Date: 2021-09-11T11:44:09+01:00
New Revision: 484944ac3b10530343df8461554b12190bbde9e9

URL: https://github.com/llvm/llvm-project/commit/484944ac3b10530343df8461554b12190bbde9e9
DIFF: https://github.com/llvm/llvm-project/commit/484944ac3b10530343df8461554b12190bbde9e9.diff

LOG: [X86][SLM] Fix HADD/HSUB uops, latency and throughput

Noticed while trying to improve generic reduction costs via the D103695 helper script. Confirmed with Intel AoM / Agner / InstLatX64.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleSLM.td
    llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
    llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index e5bc42a773de6..bd7b56a57360e 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -420,12 +420,12 @@ def  : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
 // Horizontal add/sub  instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : SLMWriteResPair<WriteFHAdd,   [SLM_FPC_RSV01], 6, [6], 4>;
-defm : SLMWriteResPair<WriteFHAddY,  [SLM_FPC_RSV01], 6, [6], 4>;
+defm : SLMWriteResPair<WriteFHAdd,   [SLM_FPC_RSV1],  6, [6], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteFHAddY>;
 defm : X86WriteResPairUnsupported<WriteFHAddZ>;
-defm : SLMWriteResPair<WritePHAdd,   [SLM_FPC_RSV01], 1>;
-defm : SLMWriteResPair<WritePHAddX,  [SLM_FPC_RSV01], 1>;
-defm : SLMWriteResPair<WritePHAddY,  [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAdd,   [SLM_FPC_RSV01], 6, [6], 3, 1>;
+defm : SLMWriteResPair<WritePHAddX,  [SLM_FPC_RSV01], 6, [6], 3, 1>;
+defm : X86WriteResPairUnsupported<WritePHAddY>;
 defm : X86WriteResPairUnsupported<WritePHAddZ>;
 
 // String instructions.

diff  --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
index 23949737b3cad..bb34d31a93ae8 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
@@ -47,14 +47,14 @@ mwait
 # CHECK-NEXT:  1      7     2.00    *                   addsubpd	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        addsubps	%xmm0, %xmm2
 # CHECK-NEXT:  1      6     1.00    *                   addsubps	(%rax), %xmm2
-# CHECK-NEXT:  4      6     3.00                        haddpd	%xmm0, %xmm2
-# CHECK-NEXT:  4      9     3.00    *                   haddpd	(%rax), %xmm2
-# CHECK-NEXT:  4      6     3.00                        haddps	%xmm0, %xmm2
-# CHECK-NEXT:  4      9     3.00    *                   haddps	(%rax), %xmm2
-# CHECK-NEXT:  4      6     3.00                        hsubpd	%xmm0, %xmm2
-# CHECK-NEXT:  4      9     3.00    *                   hsubpd	(%rax), %xmm2
-# CHECK-NEXT:  4      6     3.00                        hsubps	%xmm0, %xmm2
-# CHECK-NEXT:  4      9     3.00    *                   hsubps	(%rax), %xmm2
+# CHECK-NEXT:  4      6     6.00                        haddpd	%xmm0, %xmm2
+# CHECK-NEXT:  5      9     6.00    *                   haddpd	(%rax), %xmm2
+# CHECK-NEXT:  4      6     6.00                        haddps	%xmm0, %xmm2
+# CHECK-NEXT:  5      9     6.00    *                   haddps	(%rax), %xmm2
+# CHECK-NEXT:  4      6     6.00                        hsubpd	%xmm0, %xmm2
+# CHECK-NEXT:  5      9     6.00    *                   hsubpd	(%rax), %xmm2
+# CHECK-NEXT:  4      6     6.00                        hsubps	%xmm0, %xmm2
+# CHECK-NEXT:  5      9     6.00    *                   hsubps	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00    *                   lddqu	(%rax), %xmm2
 # CHECK-NEXT:  1      100   1.00                  U     monitor
 # CHECK-NEXT:  1      1     1.00                        movddup	%xmm0, %xmm2
@@ -77,7 +77,7 @@ mwait
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -      -      -     32.00  30.00   -      -     10.00
+# CHECK-NEXT:  -      -      -     8.00   54.00   -      -     10.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
@@ -85,14 +85,14 @@ mwait
 # CHECK-NEXT:  -      -      -      -     2.00    -      -     1.00   addsubpd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     addsubps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   addsubps	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     haddpd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   haddpd	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     haddps	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   haddps	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     hsubpd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   hsubpd	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     hsubps	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   hsubps	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -      -     haddpd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -     1.00   haddpd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -      -     haddps	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -     1.00   haddps	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -      -     hsubpd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -     1.00   hsubpd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -      -     hsubps	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -     1.00   hsubps	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00   lddqu	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     monitor
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     movddup	%xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
index 3fb48787d929f..e74a73f5bb3d5 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
@@ -122,30 +122,30 @@ psignw      (%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   palignr	$1, (%rax), %mm2
 # CHECK-NEXT:  1      1     1.00                        palignr	$1, %xmm0, %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   palignr	$1, (%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phaddd	%mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddd	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phaddd	%xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddd	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phaddsw	%mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddsw	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phaddsw	%xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddsw	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phaddw	%mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddw	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phaddw	%xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddw	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phsubd	%mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubd	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phsubd	%xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubd	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phsubsw	%mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubsw	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phsubsw	%xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubsw	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phsubw	%mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubw	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phsubw	%xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubw	(%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phaddd	%mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddd	(%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phaddd	%xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddd	(%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phaddsw	%mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddsw	(%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phaddsw	%xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddsw	(%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phaddw	%mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddw	(%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phaddw	%xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddw	(%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phsubd	%mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubd	(%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phsubd	%xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubd	(%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phsubsw	%mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubsw	(%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phsubsw	%xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubsw	(%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phsubw	%mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubw	(%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phsubw	%xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubw	(%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        pmaddubsw	%mm0, %mm2
 # CHECK-NEXT:  1      7     1.00    *                   pmaddubsw	(%rax), %mm2
 # CHECK-NEXT:  1      5     2.00                        pmaddubsw	%xmm0, %xmm2
@@ -183,7 +183,7 @@ psignw      (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -      -      -     52.00  24.00   -      -     32.00
+# CHECK-NEXT:  -      -      -     112.00 84.00   -      -     32.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
@@ -203,30 +203,30 @@ psignw      (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   palignr	$1, (%rax), %mm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     palignr	$1, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   palignr	$1, (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddd	%mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddd	(%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddd	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddsw	%mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddsw	(%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddsw	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddsw	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddw	%mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddw	(%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddw	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddw	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubd	%mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubd	(%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubd	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubsw	%mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubsw	(%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubsw	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubsw	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubw	%mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubw	(%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubw	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubw	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddd	%mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddd	(%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddsw	%mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddsw	(%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddsw	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddsw	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddw	%mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddw	(%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddw	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddw	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubd	%mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubd	(%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubsw	%mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubsw	(%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubsw	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubsw	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubw	%mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubw	(%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubw	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubw	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     pmaddubsw	%mm0, %mm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   pmaddubsw	(%rax), %mm2
 # CHECK-NEXT:  -      -      -     2.00    -      -      -      -     pmaddubsw	%xmm0, %xmm2


        


More information about the llvm-commits mailing list