[llvm] cf6cd1f - [MCA][X86] Add missing 512-bit vpscatterqd/vscatterqps schedule data (REAPPLIED)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 23 02:33:37 PDT 2024


Author: Simon Pilgrim
Date: 2024-08-23T10:32:19+01:00
New Revision: cf6cd1fd67356ca0c2972992928592d2430043d2

URL: https://github.com/llvm/llvm-project/commit/cf6cd1fd67356ca0c2972992928592d2430043d2
DIFF: https://github.com/llvm/llvm-project/commit/cf6cd1fd67356ca0c2972992928592d2430043d2.diff

LOG: [MCA][X86] Add missing 512-bit vpscatterqd/vscatterqps schedule data (REAPPLIED)

This doesn't match uops.info yet - but it matches the existing vpscatterdq/vscatterqpd entries like uops.info says it should

Reapplied with codegen fix for scatter-schedule.ll

Fixes #105675

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86SchedIceLake.td
    llvm/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/test/CodeGen/X86/scatter-schedule.ll
    llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
    llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index fd372ba4656eba..29b1464e19a32b 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -1524,8 +1524,10 @@ def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort015
   let ReleaseAtCycles = [1,8,8,2];
 }
 def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
+                                           VPSCATTERQDZmr,
                                            VPSCATTERQQZmr,
                                            VSCATTERDPDZmr,
+                                           VSCATTERQPSZmr,
                                            VSCATTERQPDZmr)>;
 
 def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {

diff  --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 4fded44085e897..2423602d06c470 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -1499,8 +1499,10 @@ def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort015
   let ReleaseAtCycles = [1,8,8,2];
 }
 def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr,
+                                           VPSCATTERQDZmr,
                                            VPSCATTERQQZmr,
                                            VSCATTERDPDZmr,
+                                           VSCATTERQPSZmr,
                                            VSCATTERQPDZmr)>;
 
 def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {

diff  --git a/llvm/test/CodeGen/X86/scatter-schedule.ll b/llvm/test/CodeGen/X86/scatter-schedule.ll
index c841e23eab76b2..762a050247a87e 100644
--- a/llvm/test/CodeGen/X86/scatter-schedule.ll
+++ b/llvm/test/CodeGen/X86/scatter-schedule.ll
@@ -10,8 +10,8 @@ define void @test(i64 %x272, <16 x ptr> %x335, <16 x i32> %x270) {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
-; CHECK-NEXT:    kxnorw %k0, %k0, %k2
-; CHECK-NEXT:    vpscatterqd %ymm2, (,%zmm0) {%k2}
+; CHECK-NEXT:    vpscatterqd %ymm2, (,%zmm0) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vextracti64x4 $1, %zmm2, %ymm0
 ; CHECK-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
 ; CHECK-NEXT:    vzeroupper

diff  --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
index c4df992f3aebca..c509e766540b15 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
@@ -1804,7 +1804,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  36     8     8.00           *            vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  19     7     4.00           *            vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT:  1      1     0.50           *            vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  19     7     4.00           *            vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd	$0, (%rax), %zmm19
@@ -1871,7 +1871,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  36     7     8.00           *            vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  19     7     4.00           *            vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT:  1      1     0.50           *            vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  19     7     4.00           *            vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4	$0, (%rax), %zmm17, %zmm19
@@ -2054,7 +2054,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -     612.00 408.17 102.67 327.50 327.50 41.50  592.17 5.00   41.50  41.50  41.50
+# CHECK-NEXT:  -     612.00 411.17 103.67 327.50 327.50 48.50  593.17 6.00   48.50  48.50  48.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -2774,7 +2774,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.50   0.50    -      -     8.00   1.50   0.50   8.00   8.00   8.00   vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT:  -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50   vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpshufd	$0, (%rax), %zmm19
@@ -2841,7 +2841,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.50   0.50    -      -     8.00   1.50   0.50   8.00   8.00   8.00   vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT:  -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50   vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vshuff32x4	$0, (%rax), %zmm17, %zmm19

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
index 5eaa0f91fdaaba..9c006d4ebb077d 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
@@ -1804,7 +1804,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  36     8     16.00          *            vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  19     7     8.00           *            vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT:  1      1     1.00           *            vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  19     7     8.00           *            vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd	$0, (%rax), %zmm19
@@ -1871,7 +1871,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  36     7     16.00          *            vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  19     7     8.00           *            vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT:  1      1     1.00           *            vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  19     7     8.00           *            vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4	$0, (%rax), %zmm17, %zmm19
@@ -2052,7 +2052,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     612.00 349.67 102.67 355.17 355.17 83.00  650.67 5.00   27.67
+# CHECK-NEXT:  -     612.00 352.67 103.67 359.83 359.83 97.00  651.67 6.00   32.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2772,7 +2772,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.50   0.50   5.33   5.33   16.00  1.50   0.50   5.33   vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpshufd	$0, (%rax), %zmm19
@@ -2839,7 +2839,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.50   0.50   5.33   5.33   16.00  1.50   0.50   5.33   vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vshuff32x4	$0, (%rax), %zmm17, %zmm19


        


More information about the llvm-commits mailing list