[llvm] 5ebe95e - [X86][Atom] Fix integer shuffles uops, latency and throughput

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 17 04:12:48 PDT 2021


Author: Simon Pilgrim
Date: 2021-09-17T12:11:54+01:00
New Revision: 5ebe95e256737506f152395b5a3eb66cf448e969

URL: https://github.com/llvm/llvm-project/commit/5ebe95e256737506f152395b5a3eb66cf448e969
DIFF: https://github.com/llvm/llvm-project/commit/5ebe95e256737506f152395b5a3eb66cf448e969.diff

LOG: [X86][Atom] Fix integer shuffles uops, latency and throughput

The MMX pack/unpck shuffles don't need an override - they have the same behaviour as other shuffles (Port0 only).
The SSE pslldq/psrldq shuffles don't need an override - they have the same behaviour as other shuffles (Port0 only).
The SSE pshufb shuffles use 4uops (+1 load).

Noticed the pslldq/psrldq issue while trying to improve reduction costs via the D103695 helper script, and fixed the others while reviewing. Confirmed with Intel AoM / Agner / InstLatX64.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleAtom.td
    llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s
    llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
    llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index 796baa25a699..e9020f54f222 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -424,7 +424,7 @@ defm : AtomWriteResPair<WriteShuffleX,      [AtomPort0],  [AtomPort0], 1, 1>;
 defm : X86WriteResPairUnsupported<WriteShuffleY>;
 defm : X86WriteResPairUnsupported<WriteShuffleZ>;
 defm : AtomWriteResPair<WriteVarShuffle,    [AtomPort0],  [AtomPort0], 1, 1>;
-defm : AtomWriteResPair<WriteVarShuffleX,  [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 4, 5, [3,3], [4,4]>;
+defm : AtomWriteResPair<WriteVarShuffleX,  [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 4, 5, [3,3], [4,4], 4, 5>;
 defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
 defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
 defm : X86WriteResPairUnsupported<WriteBlend>;
@@ -558,10 +558,7 @@ def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
 def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT,
                                       LFENCE,
                                       STOSB, STOSL, STOSQ, STOSW,
-                                      MOVSSrr, MOVSSrr_REV,
-                                      PSLLDQri, PSRLDQri)>;
-def : InstRW<[AtomWrite01_1], (instregex "MMX_PACK(SSDW|SSWB|USWB)irr",
-                                         "MMX_PUNPCKH(BW|DQ|WD)irr")>;
+                                      MOVSSrr, MOVSSrr_REV)>;
 
 def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
   let Latency = 2;

diff  --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s
index 1bc1ab078c76..bc929e14fc7f 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s
@@ -173,11 +173,11 @@ pxor        (%rax), %mm2
 # CHECK-NEXT:  1      1     1.00    *                   movq	(%rax), %mm2
 # CHECK-NEXT:  1      3     3.00                        movq	%mm0, %rcx
 # CHECK-NEXT:  1      1     1.00           *            movq	%mm0, (%rax)
-# CHECK-NEXT:  1      1     0.50                        packsswb	%mm0, %mm2
+# CHECK-NEXT:  1      1     1.00                        packsswb	%mm0, %mm2
 # CHECK-NEXT:  1      1     1.00    *                   packsswb	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        packssdw	%mm0, %mm2
+# CHECK-NEXT:  1      1     1.00                        packssdw	%mm0, %mm2
 # CHECK-NEXT:  1      1     1.00    *                   packssdw	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        packuswb	%mm0, %mm2
+# CHECK-NEXT:  1      1     1.00                        packuswb	%mm0, %mm2
 # CHECK-NEXT:  1      1     1.00    *                   packuswb	(%rax), %mm2
 # CHECK-NEXT:  1      1     0.50                        paddb	%mm0, %mm2
 # CHECK-NEXT:  1      1     1.00    *                   paddb	(%rax), %mm2
@@ -255,11 +255,11 @@ pxor        (%rax), %mm2
 # CHECK-NEXT:  1      1     1.00    *                   psubusw	(%rax), %mm2
 # CHECK-NEXT:  1      1     0.50                        psubw	%mm0, %mm2
 # CHECK-NEXT:  1      1     1.00    *                   psubw	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        punpckhbw	%mm0, %mm2
+# CHECK-NEXT:  1      1     1.00                        punpckhbw	%mm0, %mm2
 # CHECK-NEXT:  1      1     1.00    *                   punpckhbw	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        punpckhdq	%mm0, %mm2
+# CHECK-NEXT:  1      1     1.00                        punpckhdq	%mm0, %mm2
 # CHECK-NEXT:  1      1     1.00    *                   punpckhdq	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        punpckhwd	%mm0, %mm2
+# CHECK-NEXT:  1      1     1.00                        punpckhwd	%mm0, %mm2
 # CHECK-NEXT:  1      1     1.00    *                   punpckhwd	(%rax), %mm2
 # CHECK-NEXT:  1      1     1.00                        punpcklbw	%mm0, %mm2
 # CHECK-NEXT:  1      1     1.00    *                   punpcklbw	(%rax), %mm2
@@ -276,7 +276,7 @@ pxor        (%rax), %mm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]
-# CHECK-NEXT: 103.50 41.50
+# CHECK-NEXT: 106.50 38.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    Instructions:
@@ -289,11 +289,11 @@ pxor        (%rax), %mm2
 # CHECK-NEXT: 1.00    -     movq	(%rax), %mm2
 # CHECK-NEXT: 3.00    -     movq	%mm0, %rcx
 # CHECK-NEXT: 1.00    -     movq	%mm0, (%rax)
-# CHECK-NEXT: 0.50   0.50   packsswb	%mm0, %mm2
+# CHECK-NEXT: 1.00    -     packsswb	%mm0, %mm2
 # CHECK-NEXT: 1.00    -     packsswb	(%rax), %mm2
-# CHECK-NEXT: 0.50   0.50   packssdw	%mm0, %mm2
+# CHECK-NEXT: 1.00    -     packssdw	%mm0, %mm2
 # CHECK-NEXT: 1.00    -     packssdw	(%rax), %mm2
-# CHECK-NEXT: 0.50   0.50   packuswb	%mm0, %mm2
+# CHECK-NEXT: 1.00    -     packuswb	%mm0, %mm2
 # CHECK-NEXT: 1.00    -     packuswb	(%rax), %mm2
 # CHECK-NEXT: 0.50   0.50   paddb	%mm0, %mm2
 # CHECK-NEXT: 1.00    -     paddb	(%rax), %mm2
@@ -371,11 +371,11 @@ pxor        (%rax), %mm2
 # CHECK-NEXT: 1.00    -     psubusw	(%rax), %mm2
 # CHECK-NEXT: 0.50   0.50   psubw	%mm0, %mm2
 # CHECK-NEXT: 1.00    -     psubw	(%rax), %mm2
-# CHECK-NEXT: 0.50   0.50   punpckhbw	%mm0, %mm2
+# CHECK-NEXT: 1.00    -     punpckhbw	%mm0, %mm2
 # CHECK-NEXT: 1.00    -     punpckhbw	(%rax), %mm2
-# CHECK-NEXT: 0.50   0.50   punpckhdq	%mm0, %mm2
+# CHECK-NEXT: 1.00    -     punpckhdq	%mm0, %mm2
 # CHECK-NEXT: 1.00    -     punpckhdq	(%rax), %mm2
-# CHECK-NEXT: 0.50   0.50   punpckhwd	%mm0, %mm2
+# CHECK-NEXT: 1.00    -     punpckhwd	%mm0, %mm2
 # CHECK-NEXT: 1.00    -     punpckhwd	(%rax), %mm2
 # CHECK-NEXT: 1.00    -     punpcklbw	%mm0, %mm2
 # CHECK-NEXT: 1.00    -     punpcklbw	(%rax), %mm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
index 4536b7f21a7b..2cd1d8d93c0c 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
@@ -597,7 +597,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        pslld	$1, %xmm2
 # CHECK-NEXT:  1      2     1.00                        pslld	%xmm0, %xmm2
 # CHECK-NEXT:  1      3     2.00    *                   pslld	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        pslldq	$1, %xmm2
+# CHECK-NEXT:  1      1     1.00                        pslldq	$1, %xmm2
 # CHECK-NEXT:  1      1     1.00                        psllq	$1, %xmm2
 # CHECK-NEXT:  1      2     1.00                        psllq	%xmm0, %xmm2
 # CHECK-NEXT:  1      3     2.00    *                   psllq	(%rax), %xmm2
@@ -613,7 +613,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        psrld	$1, %xmm2
 # CHECK-NEXT:  1      2     1.00                        psrld	%xmm0, %xmm2
 # CHECK-NEXT:  1      3     2.00    *                   psrld	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        psrldq	$1, %xmm2
+# CHECK-NEXT:  1      1     1.00                        psrldq	$1, %xmm2
 # CHECK-NEXT:  1      1     1.00                        psrlq	$1, %xmm2
 # CHECK-NEXT:  1      2     1.00                        psrlq	%xmm0, %xmm2
 # CHECK-NEXT:  1      3     2.00    *                   psrlq	(%rax), %xmm2
@@ -681,7 +681,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]
-# CHECK-NEXT: 912.00 774.00
+# CHECK-NEXT: 913.00 773.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    Instructions:
@@ -875,7 +875,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT: 1.00    -     pslld	$1, %xmm2
 # CHECK-NEXT: 1.00   1.00   pslld	%xmm0, %xmm2
 # CHECK-NEXT: 2.00   2.00   pslld	(%rax), %xmm2
-# CHECK-NEXT: 0.50   0.50   pslldq	$1, %xmm2
+# CHECK-NEXT: 1.00    -     pslldq	$1, %xmm2
 # CHECK-NEXT: 1.00    -     psllq	$1, %xmm2
 # CHECK-NEXT: 1.00   1.00   psllq	%xmm0, %xmm2
 # CHECK-NEXT: 2.00   2.00   psllq	(%rax), %xmm2
@@ -891,7 +891,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT: 1.00    -     psrld	$1, %xmm2
 # CHECK-NEXT: 1.00   1.00   psrld	%xmm0, %xmm2
 # CHECK-NEXT: 2.00   2.00   psrld	(%rax), %xmm2
-# CHECK-NEXT: 0.50   0.50   psrldq	$1, %xmm2
+# CHECK-NEXT: 1.00    -     psrldq	$1, %xmm2
 # CHECK-NEXT: 1.00    -     psrlq	$1, %xmm2
 # CHECK-NEXT: 1.00   1.00   psrlq	%xmm0, %xmm2
 # CHECK-NEXT: 2.00   2.00   psrlq	(%rax), %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s
index dc2e41a8a974..b04be1a3fdc2 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s
@@ -156,8 +156,8 @@ psignw      (%rax), %xmm2
 # CHECK-NEXT:  1      5     2.00    *                   pmulhrsw	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        pshufb	%mm0, %mm2
 # CHECK-NEXT:  1      1     1.00    *                   pshufb	(%rax), %mm2
-# CHECK-NEXT:  1      4     3.00                        pshufb	%xmm0, %xmm2
-# CHECK-NEXT:  1      5     4.00    *                   pshufb	(%rax), %xmm2
+# CHECK-NEXT:  4      4     3.00                        pshufb	%xmm0, %xmm2
+# CHECK-NEXT:  5      5     4.00    *                   pshufb	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        psignb	%mm0, %mm2
 # CHECK-NEXT:  1      1     1.00    *                   psignb	(%rax), %mm2
 # CHECK-NEXT:  1      1     0.50                        psignb	%xmm0, %xmm2


        


More information about the llvm-commits mailing list