[llvm] 5ebe95e - [X86][Atom] Fix integer shuffles uops, latency and throughput
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 17 04:12:48 PDT 2021
Author: Simon Pilgrim
Date: 2021-09-17T12:11:54+01:00
New Revision: 5ebe95e256737506f152395b5a3eb66cf448e969
URL: https://github.com/llvm/llvm-project/commit/5ebe95e256737506f152395b5a3eb66cf448e969
DIFF: https://github.com/llvm/llvm-project/commit/5ebe95e256737506f152395b5a3eb66cf448e969.diff
LOG: [X86][Atom] Fix integer shuffles uops, latency and throughput
The MMX pack/unpck shuffles don't need an override - they have the same behaviour as other shuffles (Port0 only).
The SSE pslldq/psrldq shuffles don't need an override - they have the same behaviour as other shuffles (Port0 only).
The SSE pshufb shuffles use 4uops (+1 load).
Noticed the pslldq/psrldq issue while trying to improve reduction costs via the D103695 helper script, and fixed the others while reviewing. Confirmed with Intel AoM / Agner / InstLatX64.
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleAtom.td
llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s
llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index 796baa25a699..e9020f54f222 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -424,7 +424,7 @@ defm : AtomWriteResPair<WriteShuffleX, [AtomPort0], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteShuffleY>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : AtomWriteResPair<WriteVarShuffle, [AtomPort0], [AtomPort0], 1, 1>;
-defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 4, 5, [3,3], [4,4]>;
+defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 4, 5, [3,3], [4,4], 4, 5>;
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : X86WriteResPairUnsupported<WriteBlend>;
@@ -558,10 +558,7 @@ def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT,
LFENCE,
STOSB, STOSL, STOSQ, STOSW,
- MOVSSrr, MOVSSrr_REV,
- PSLLDQri, PSRLDQri)>;
-def : InstRW<[AtomWrite01_1], (instregex "MMX_PACK(SSDW|SSWB|USWB)irr",
- "MMX_PUNPCKH(BW|DQ|WD)irr")>;
+ MOVSSrr, MOVSSrr_REV)>;
def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
let Latency = 2;
diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s
index 1bc1ab078c76..bc929e14fc7f 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s
@@ -173,11 +173,11 @@ pxor (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 * movq (%rax), %mm2
# CHECK-NEXT: 1 3 3.00 movq %mm0, %rcx
# CHECK-NEXT: 1 1 1.00 * movq %mm0, (%rax)
-# CHECK-NEXT: 1 1 0.50 packsswb %mm0, %mm2
+# CHECK-NEXT: 1 1 1.00 packsswb %mm0, %mm2
# CHECK-NEXT: 1 1 1.00 * packsswb (%rax), %mm2
-# CHECK-NEXT: 1 1 0.50 packssdw %mm0, %mm2
+# CHECK-NEXT: 1 1 1.00 packssdw %mm0, %mm2
# CHECK-NEXT: 1 1 1.00 * packssdw (%rax), %mm2
-# CHECK-NEXT: 1 1 0.50 packuswb %mm0, %mm2
+# CHECK-NEXT: 1 1 1.00 packuswb %mm0, %mm2
# CHECK-NEXT: 1 1 1.00 * packuswb (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 paddb %mm0, %mm2
# CHECK-NEXT: 1 1 1.00 * paddb (%rax), %mm2
@@ -255,11 +255,11 @@ pxor (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 * psubusw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 psubw %mm0, %mm2
# CHECK-NEXT: 1 1 1.00 * psubw (%rax), %mm2
-# CHECK-NEXT: 1 1 0.50 punpckhbw %mm0, %mm2
+# CHECK-NEXT: 1 1 1.00 punpckhbw %mm0, %mm2
# CHECK-NEXT: 1 1 1.00 * punpckhbw (%rax), %mm2
-# CHECK-NEXT: 1 1 0.50 punpckhdq %mm0, %mm2
+# CHECK-NEXT: 1 1 1.00 punpckhdq %mm0, %mm2
# CHECK-NEXT: 1 1 1.00 * punpckhdq (%rax), %mm2
-# CHECK-NEXT: 1 1 0.50 punpckhwd %mm0, %mm2
+# CHECK-NEXT: 1 1 1.00 punpckhwd %mm0, %mm2
# CHECK-NEXT: 1 1 1.00 * punpckhwd (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 punpcklbw %mm0, %mm2
# CHECK-NEXT: 1 1 1.00 * punpcklbw (%rax), %mm2
@@ -276,7 +276,7 @@ pxor (%rax), %mm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
-# CHECK-NEXT: 103.50 41.50
+# CHECK-NEXT: 106.50 38.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
@@ -289,11 +289,11 @@ pxor (%rax), %mm2
# CHECK-NEXT: 1.00 - movq (%rax), %mm2
# CHECK-NEXT: 3.00 - movq %mm0, %rcx
# CHECK-NEXT: 1.00 - movq %mm0, (%rax)
-# CHECK-NEXT: 0.50 0.50 packsswb %mm0, %mm2
+# CHECK-NEXT: 1.00 - packsswb %mm0, %mm2
# CHECK-NEXT: 1.00 - packsswb (%rax), %mm2
-# CHECK-NEXT: 0.50 0.50 packssdw %mm0, %mm2
+# CHECK-NEXT: 1.00 - packssdw %mm0, %mm2
# CHECK-NEXT: 1.00 - packssdw (%rax), %mm2
-# CHECK-NEXT: 0.50 0.50 packuswb %mm0, %mm2
+# CHECK-NEXT: 1.00 - packuswb %mm0, %mm2
# CHECK-NEXT: 1.00 - packuswb (%rax), %mm2
# CHECK-NEXT: 0.50 0.50 paddb %mm0, %mm2
# CHECK-NEXT: 1.00 - paddb (%rax), %mm2
@@ -371,11 +371,11 @@ pxor (%rax), %mm2
# CHECK-NEXT: 1.00 - psubusw (%rax), %mm2
# CHECK-NEXT: 0.50 0.50 psubw %mm0, %mm2
# CHECK-NEXT: 1.00 - psubw (%rax), %mm2
-# CHECK-NEXT: 0.50 0.50 punpckhbw %mm0, %mm2
+# CHECK-NEXT: 1.00 - punpckhbw %mm0, %mm2
# CHECK-NEXT: 1.00 - punpckhbw (%rax), %mm2
-# CHECK-NEXT: 0.50 0.50 punpckhdq %mm0, %mm2
+# CHECK-NEXT: 1.00 - punpckhdq %mm0, %mm2
# CHECK-NEXT: 1.00 - punpckhdq (%rax), %mm2
-# CHECK-NEXT: 0.50 0.50 punpckhwd %mm0, %mm2
+# CHECK-NEXT: 1.00 - punpckhwd %mm0, %mm2
# CHECK-NEXT: 1.00 - punpckhwd (%rax), %mm2
# CHECK-NEXT: 1.00 - punpcklbw %mm0, %mm2
# CHECK-NEXT: 1.00 - punpcklbw (%rax), %mm2
diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
index 4536b7f21a7b..2cd1d8d93c0c 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s
@@ -597,7 +597,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pslld $1, %xmm2
# CHECK-NEXT: 1 2 1.00 pslld %xmm0, %xmm2
# CHECK-NEXT: 1 3 2.00 * pslld (%rax), %xmm2
-# CHECK-NEXT: 1 1 0.50 pslldq $1, %xmm2
+# CHECK-NEXT: 1 1 1.00 pslldq $1, %xmm2
# CHECK-NEXT: 1 1 1.00 psllq $1, %xmm2
# CHECK-NEXT: 1 2 1.00 psllq %xmm0, %xmm2
# CHECK-NEXT: 1 3 2.00 * psllq (%rax), %xmm2
@@ -613,7 +613,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psrld $1, %xmm2
# CHECK-NEXT: 1 2 1.00 psrld %xmm0, %xmm2
# CHECK-NEXT: 1 3 2.00 * psrld (%rax), %xmm2
-# CHECK-NEXT: 1 1 0.50 psrldq $1, %xmm2
+# CHECK-NEXT: 1 1 1.00 psrldq $1, %xmm2
# CHECK-NEXT: 1 1 1.00 psrlq $1, %xmm2
# CHECK-NEXT: 1 2 1.00 psrlq %xmm0, %xmm2
# CHECK-NEXT: 1 3 2.00 * psrlq (%rax), %xmm2
@@ -681,7 +681,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
-# CHECK-NEXT: 912.00 774.00
+# CHECK-NEXT: 913.00 773.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
@@ -875,7 +875,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1.00 - pslld $1, %xmm2
# CHECK-NEXT: 1.00 1.00 pslld %xmm0, %xmm2
# CHECK-NEXT: 2.00 2.00 pslld (%rax), %xmm2
-# CHECK-NEXT: 0.50 0.50 pslldq $1, %xmm2
+# CHECK-NEXT: 1.00 - pslldq $1, %xmm2
# CHECK-NEXT: 1.00 - psllq $1, %xmm2
# CHECK-NEXT: 1.00 1.00 psllq %xmm0, %xmm2
# CHECK-NEXT: 2.00 2.00 psllq (%rax), %xmm2
@@ -891,7 +891,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1.00 - psrld $1, %xmm2
# CHECK-NEXT: 1.00 1.00 psrld %xmm0, %xmm2
# CHECK-NEXT: 2.00 2.00 psrld (%rax), %xmm2
-# CHECK-NEXT: 0.50 0.50 psrldq $1, %xmm2
+# CHECK-NEXT: 1.00 - psrldq $1, %xmm2
# CHECK-NEXT: 1.00 - psrlq $1, %xmm2
# CHECK-NEXT: 1.00 1.00 psrlq %xmm0, %xmm2
# CHECK-NEXT: 2.00 2.00 psrlq (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s
index dc2e41a8a974..b04be1a3fdc2 100644
--- a/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s
+++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s
@@ -156,8 +156,8 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 1 5 2.00 * pmulhrsw (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pshufb %mm0, %mm2
# CHECK-NEXT: 1 1 1.00 * pshufb (%rax), %mm2
-# CHECK-NEXT: 1 4 3.00 pshufb %xmm0, %xmm2
-# CHECK-NEXT: 1 5 4.00 * pshufb (%rax), %xmm2
+# CHECK-NEXT: 4 4 3.00 pshufb %xmm0, %xmm2
+# CHECK-NEXT: 5 5 4.00 * pshufb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psignb %mm0, %mm2
# CHECK-NEXT: 1 1 1.00 * psignb (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 psignb %xmm0, %xmm2
More information about the llvm-commits
mailing list