[llvm] 5ab65a6 - [X86] VPERM2*128 instructions aren't microcoded on znver2

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 19 02:05:27 PDT 2024


Author: Simon Pilgrim
Date: 2024-08-19T10:05:15+01:00
New Revision: 5ab65a6c1c38eef2a096a6b9231281fb4876c387

URL: https://github.com/llvm/llvm-project/commit/5ab65a6c1c38eef2a096a6b9231281fb4876c387
DIFF: https://github.com/llvm/llvm-project/commit/5ab65a6c1c38eef2a096a6b9231281fb4876c387.diff

LOG: [X86] VPERM2*128 instructions aren't microcoded on znver2

This appears to be a copy+paste error from znver1 (which isn't really microcoded either - but it is rather complex!).

Confirmed with Agner + uops.info.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleZnver2.td
    llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index c0775847798d27..c3a0f2684a2989 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -994,9 +994,18 @@ def : InstRW<[Zn2WritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
 //-- Move instructions --//
 
 // VPERM2F128 / VPERM2I128.
-def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr,
+def Zn2WriteVPERM2r : SchedWriteRes<[Zn2FPU2]> {
+  let NumMicroOps = 1;
+  let Latency = 3;
+}
+def : InstRW<[Zn2WriteVPERM2r], (instrs VPERM2F128rr,
                                         VPERM2I128rr)>;
-def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm,
+
+def Zn2WriteVPERM2m : SchedWriteRes<[Zn2AGU, Zn2FPU2]> {
+  let NumMicroOps = 1;
+  let Latency = 8;
+}
+def : InstRW<[Zn2WriteVPERM2m], (instrs VPERM2F128rm,
                                         VPERM2I128rm)>;
 
 def Zn2WriteBROADCAST : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
@@ -1011,7 +1020,7 @@ def : InstRW<[Zn2WriteBROADCAST], (instrs VBROADCASTF128rm,
 // r32,x,i.
 def Zn2WriteEXTRACTPSr : SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
   let Latency = 2;
-  let NumMicroOps = 2; 
+  let NumMicroOps = 2;
   let ReleaseAtCycles = [1, 2];
 }
 def : InstRW<[Zn2WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
index 37e2cf4500c38a..96ae0575969671 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
@@ -1429,8 +1429,8 @@ vzeroupper
 # CHECK-NEXT:  1      100   0.25    *                   vpcmpistri	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      100   0.25                        vpcmpistrm	$1, %xmm0, %xmm2
 # CHECK-NEXT:  1      100   0.25    *                   vpcmpistrm	$1, (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        vperm2f128	$1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vperm2f128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     1.00                        vperm2f128	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      8     1.00    *                   vperm2f128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vpermilpd	$1, %xmm0, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpermilpd	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      3     0.50                        vpermilpd	%xmm0, %xmm1, %xmm2
@@ -1739,7 +1739,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]
-# CHECK-NEXT: 117.00 117.00 117.00 0.25   0.25   0.25   0.25    -     126.58 192.58 196.75 305.08  -
+# CHECK-NEXT: 117.33 117.33 117.33 0.25   0.25   0.25   0.25    -     126.58 192.58 198.75 305.08  -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   Instructions:
@@ -2142,8 +2142,8 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     vpcmpistri	$1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     vpcmpistrm	$1, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     vpcmpistrm	$1, (%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     vperm2f128	$1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     vperm2f128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -      -     vperm2f128	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -     1.00    -      -     vperm2f128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd	$1, %xmm0, %xmm2
 # CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd	$1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd	%xmm0, %xmm1, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s
index 33d18b0fa0fb55..7be45be2c3578e 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s
@@ -558,8 +558,8 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpgtq	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.33                        vpcmpgtw	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.33    *                   vpcmpgtw	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25                        vperm2i128	$1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vperm2i128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     1.00                        vperm2i128	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      8     1.00    *                   vperm2i128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      2     0.50                        vpermd	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      9     0.50    *                   vpermd	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      2     0.50                        vpermpd	$1, %ymm0, %ymm2
@@ -779,7 +779,7 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]
-# CHECK-NEXT: 44.33  44.33  44.33   -      -      -      -      -     63.17  125.67 71.00  34.17   -
+# CHECK-NEXT: 44.67  44.67  44.67   -      -      -      -      -     63.17  125.67 73.00  34.17   -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   Instructions:
@@ -881,8 +881,8 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -     0.50    -      -     0.50    -     vpcmpgtq	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.33   0.33    -     0.33    -     vpcmpgtw	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -     0.33   0.33    -     0.33    -     vpcmpgtw	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     vperm2i128	$1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     vperm2i128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -      -     vperm2i128	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -     1.00    -      -     vperm2i128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermd	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -     0.50   0.50    -      -     vpermd	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermpd	$1, %ymm0, %ymm2


        


More information about the llvm-commits mailing list