[llvm] 9e3e8b5 - [X86] VPERM2*128 instructions aren't microcoded on znver1

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 19 02:05:32 PDT 2024


Author: Simon Pilgrim
Date: 2024-08-19T10:05:15+01:00
New Revision: 9e3e8b5715d01fc7ac6b0bdcd1870f3823f3ab30

URL: https://github.com/llvm/llvm-project/commit/9e3e8b5715d01fc7ac6b0bdcd1870f3823f3ab30
DIFF: https://github.com/llvm/llvm-project/commit/9e3e8b5715d01fc7ac6b0bdcd1870f3823f3ab30.diff

LOG: [X86] VPERM2*128 instructions aren't microcoded on znver1

AMD refer to them as microcoded, but not in the same way as LLVM - the uop count and pipe usage is high but predictable

Confirmed with Agner + uops.info.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleZnver1.td
    llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 7ee9eadf843908..a044ddc3001329 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -986,10 +986,21 @@ def : InstRW<[ZnWritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
 //-- Move instructions --//
 
 // VPERM2F128 / VPERM2I128.
-def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr,
-                                        VPERM2I128rr)>;
-def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm,
-                                        VPERM2I128rm)>;
+def ZnWriteVPERM2r : SchedWriteRes<[ZnFPU0, ZnFPU12]> {
+  let NumMicroOps = 8;
+  let Latency = 3;
+  let ReleaseAtCycles = [3,3];
+}
+def : InstRW<[ZnWriteVPERM2r], (instrs VPERM2F128rr,
+                                       VPERM2I128rr)>;
+
+def ZnWriteVPERM2m : SchedWriteRes<[ZnAGU, ZnFPU0, ZnFPU12]> {
+  let NumMicroOps = 12;
+  let Latency = 8;
+  let ReleaseAtCycles = [1,3,3];
+}
+def : InstRW<[ZnWriteVPERM2m], (instrs VPERM2F128rm,
+                                       VPERM2I128rm)>;
 
 def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> {
   let NumMicroOps = 2;

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
index 3914f865089247..00e69d8960565f 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
@@ -1429,8 +1429,8 @@ vzeroupper
 # CHECK-NEXT:  1      100   0.25    *                   vpcmpistri	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      100   0.25                        vpcmpistrm	$1, %xmm0, %xmm2
 # CHECK-NEXT:  1      100   0.25    *                   vpcmpistrm	$1, (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        vperm2f128	$1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vperm2f128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  8      3     3.00                        vperm2f128	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  12     8     3.00    *                   vperm2f128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vpermilpd	$1, %xmm0, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpermilpd	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        vpermilpd	%xmm0, %xmm1, %xmm2
@@ -1738,7 +1738,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 175.00 175.00  -      -      -      -      -     144.25 227.25 223.75 315.75  -
+# CHECK-NEXT: 175.50 175.50  -      -      -      -      -     150.25 230.25 226.75 315.75  -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -2141,8 +2141,8 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vpcmpistri	$1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vpcmpistrm	$1, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vpcmpistrm	$1, (%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vperm2f128	$1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vperm2f128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     3.00   1.50   1.50    -      -     vperm2f128	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     3.00   1.50   1.50    -      -     vperm2f128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd	$1, %xmm0, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd	$1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd	%xmm0, %xmm1, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
index 93b601e9285f16..a3ebe63e718986 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
@@ -558,8 +558,8 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      8     1.00    *                   vpcmpgtq	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     0.67                        vpcmpgtw	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.67    *                   vpcmpgtw	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25                        vperm2i128	$1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vperm2i128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  8      3     3.00                        vperm2i128	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  12     8     3.00    *                   vperm2i128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      2     1.00                        vpermd	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      9     1.00    *                   vpermd	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      2     1.00                        vpermpd	$1, %ymm0, %ymm2
@@ -778,7 +778,7 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 66.50  66.50   -      -      -      -      -     120.17 239.17 158.00 66.67   -
+# CHECK-NEXT: 67.00  67.00   -      -      -      -      -     126.17 242.17 161.00 66.67   -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -880,8 +880,8 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -     1.00    -     vpcmpgtq	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.67   0.67    -     0.67    -     vpcmpgtw	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.67   0.67    -     0.67    -     vpcmpgtw	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vperm2i128	$1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vperm2i128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     3.00   1.50   1.50    -      -     vperm2i128	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     3.00   1.50   1.50    -      -     vperm2i128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vpermd	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vpermd	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vpermpd	$1, %ymm0, %ymm2


        


More information about the llvm-commits mailing list