[llvm-branch-commits] [llvm] release/19.x: [MCA][X86] Add missing 512-bit vpscatterqd/vscatterqps schedule data (REAPPLIED) (PR #105815)
Tobias Hieta via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Aug 26 00:20:02 PDT 2024
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/105815
>From 83aa83a911ef7f46d2cdfe4f304c121c4659cf4e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Thu, 1 Aug 2024 16:08:33 +0100
Subject: [PATCH 1/4] [llvm-mca][x86] Add test coverage for evex variant of
vextractps
(cherry picked from commit 3276ee30226de1572bde5bd3716a1d343bbb3657)
---
llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s | 9 ++++++++-
.../tools/llvm-mca/X86/IceLakeServer/resources-avx512.s | 9 ++++++++-
.../tools/llvm-mca/X86/SapphireRapids/resources-avx512.s | 9 ++++++++-
.../tools/llvm-mca/X86/SkylakeServer/resources-avx512.s | 9 ++++++++-
llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s | 9 ++++++++-
5 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
index a8937f7dcfd117..1df586faa543d1 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
@@ -298,6 +298,9 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1}
vdivps (%rax), %zmm17, %zmm19 {z}{k1}
vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
vfmadd132pd %zmm16, %zmm17, %zmm19
vfmadd132pd (%rax), %zmm17, %zmm19
vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
@@ -1334,6 +1337,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 3 29 28.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 4 36 28.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 4 36 28.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 3 1.00 {evex} vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: 3 5 1.00 * {evex} vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 5 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19
# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
@@ -2027,7 +2032,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 1506.00 197.00 334.00 16.00 522.00 299.50 299.50
+# CHECK-NEXT: - 1506.00 198.00 335.00 17.00 523.00 300.00 300.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -2290,6 +2295,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - 28.00 2.50 - - 0.50 - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - {evex} vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: - - - 0.50 1.00 0.50 0.50 0.50 {evex} vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
index 5c12c520b04af2..1ff8eccf290a6f 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
@@ -298,6 +298,9 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1}
vdivps (%rax), %zmm17, %zmm19 {z}{k1}
vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
vfmadd132pd %zmm16, %zmm17, %zmm19
vfmadd132pd (%rax), %zmm17, %zmm19
vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
@@ -1334,6 +1337,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 3 18 10.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 4 25 10.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 4 25 10.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 3 1.00 {evex} vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: 3 2 1.00 * {evex} vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 11 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19
# CHECK-NEXT: 2 11 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
@@ -2031,7 +2036,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - 612.00 398.17 99.67 327.50 327.50 8.00 585.17 2.00 8.00 8.00 8.00
+# CHECK-NEXT: - 612.00 399.17 99.67 327.50 327.50 8.50 587.17 2.00 8.50 8.50 8.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -2294,6 +2299,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - 10.00 2.00 - - - - 1.00 - - - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - - - vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - {evex} vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: - - - - - - 0.50 1.00 - 0.50 0.50 0.50 {evex} vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132pd (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s
index b34ccaacc11a32..88f3313c70fde2 100644
--- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s
@@ -298,6 +298,9 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1}
vdivps (%rax), %zmm17, %zmm19 {z}{k1}
vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
vfmadd132pd %zmm16, %zmm17, %zmm19
vfmadd132pd (%rax), %zmm17, %zmm19
vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
@@ -1334,6 +1337,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 3 18 2.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 4 25 2.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 4 25 2.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 4 1.00 {evex} vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: 3 12 1.00 * {evex} vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 12 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19
# CHECK-NEXT: 2 12 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
@@ -2032,7 +2037,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
-# CHECK-NEXT: 490.00 12.00 218.33 218.33 8.00 575.00 - 8.00 8.00 8.00 - 218.33 -
+# CHECK-NEXT: 491.00 12.00 218.33 218.33 8.50 577.00 - 8.50 8.50 8.50 - 218.33 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@@ -2295,6 +2300,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - {evex} vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - {evex} vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
index b1bfd7a9ec448a..108ef75b0ac417 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
@@ -298,6 +298,9 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1}
vdivps (%rax), %zmm17, %zmm19 {z}{k1}
vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
vfmadd132pd %zmm16, %zmm17, %zmm19
vfmadd132pd (%rax), %zmm17, %zmm19
vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
@@ -1334,6 +1337,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 3 18 10.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 4 25 10.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 4 25 10.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 3 1.00 {evex} vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: 3 2 1.00 * {evex} vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 4 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19
# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
@@ -2029,7 +2034,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 612.00 339.67 99.67 332.83 332.83 16.00 643.67 2.00 5.33
+# CHECK-NEXT: - 612.00 340.67 99.67 333.17 333.17 17.00 645.67 2.00 5.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -2292,6 +2297,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - 10.00 2.00 - - - - 1.00 - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - - 1.00 - - {evex} vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 {evex} vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd132pd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132pd (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
index 6742cfccb2d001..51caeab1b3b7ca 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
@@ -298,6 +298,9 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1}
vdivps (%rax), %zmm17, %zmm19 {z}{k1}
vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
vfmadd132pd %zmm16, %zmm17, %zmm19
vfmadd132pd (%rax), %zmm17, %zmm19
vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
@@ -1334,6 +1337,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 11 6.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 18 6.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 18 6.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 1 1.00 {evex} vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: 2 2 1.00 * {evex} vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 11 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 11 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
@@ -2042,7 +2047,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 2.67 2.67 2.67 - - - - - 221.00 1060.50 618.00 352.50 295.50 295.50 16.00 199.67 199.67 199.67 194.33 194.33 194.33 8.00 8.00
+# CHECK-NEXT: 2.67 2.67 2.67 - - - - - 221.00 1060.50 618.00 352.50 297.00 297.00 17.00 200.00 200.00 200.00 194.33 194.33 194.33 8.50 8.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2305,6 +2310,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - - - - - 6.00 - - - - - - - - - - - - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 6.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 6.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - {evex} vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 {evex} vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19
>From abb4af89ca31a2d02301f6d2ad98db61cc4a11a8 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Thu, 22 Aug 2024 17:22:21 +0100
Subject: [PATCH 2/4] [MCA][X86] Add scatter instruction test coverage for
#105675
(cherry picked from commit 6ec4c9c3eb4a556f848dac37a2d6f0d46ecc6f02)
---
.../llvm-mca/X86/Generic/resources-avx512.s | 28 +++++++++-
.../llvm-mca/X86/Generic/resources-avx512vl.s | 54 ++++++++++++++++++-
.../X86/SapphireRapids/resources-avx512.s | 28 +++++++++-
.../X86/SapphireRapids/resources-avx512vl.s | 54 ++++++++++++++++++-
.../X86/SkylakeServer/resources-avx512.s | 28 +++++++++-
.../X86/SkylakeServer/resources-avx512vl.s | 54 ++++++++++++++++++-
.../llvm-mca/X86/Znver4/resources-avx512.s | 28 +++++++++-
.../llvm-mca/X86/Znver4/resources-avx512vl.s | 54 ++++++++++++++++++-
8 files changed, 320 insertions(+), 8 deletions(-)
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
index 1df586faa543d1..c3453d890d76d5 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
@@ -814,6 +814,11 @@ vpermq %zmm16, %zmm17, %zmm19 {z}{k1}
vpermq (%rax), %zmm17, %zmm19 {z}{k1}
vpermq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
+
vpshufd $0, %zmm16, %zmm19
vpshufd $0, (%rax), %zmm19
vpshufd $0, (%rax){1to16}, %zmm19
@@ -884,6 +889,11 @@ vpunpcklqdq %zmm16, %zmm17, %zmm19 {z}{k1}
vpunpcklqdq (%rax), %zmm17, %zmm19 {z}{k1}
vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
+
vshuff32x4 $0, %zmm16, %zmm17, %zmm19
vshuff32x4 $0, (%rax), %zmm17, %zmm19
vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1792,6 +1802,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 1.00 vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax), %zmm19
# CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax){1to16}, %zmm19
@@ -1855,6 +1869,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 1.00 vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 1 1.00 * vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 8 1.00 * vshuff32x4 $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: 2 8 1.00 * vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
@@ -2032,7 +2050,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 1506.00 198.00 335.00 17.00 523.00 300.00 300.00
+# CHECK-NEXT: - 1506.00 198.00 335.00 25.00 523.00 304.00 304.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -2750,6 +2768,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - 1.00 - - vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - 1.00 - - vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpshufd $0, (%rax), %zmm19
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpshufd $0, (%rax){1to16}, %zmm19
@@ -2813,6 +2835,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - 1.00 - - vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - 1.00 - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vshuff32x4 $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
index e8e7a80f690bfa..4a4f77826437bd 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
@@ -1344,6 +1344,16 @@ vpmulld %ymm16, %ymm17, %ymm19 {z}{k1}
vpmulld (%rax), %ymm17, %ymm19 {z}{k1}
vpmulld (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
+
vpshufd $0, %xmm16, %xmm19
vpshufd $0, (%rax), %xmm19
vpshufd $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq %ymm16, %ymm17, %ymm19 {z}{k1}
vpunpckldq (%rax), %ymm17, %ymm19 {z}{k1}
vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
+
vshuff32x4 $0, %ymm16, %ymm17, %ymm19
vshuff32x4 $0, (%rax), %ymm17, %ymm19
vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 5 1.00 vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 12 1.00 * vpmulld (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 12 1.00 * vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: 1 1 0.50 vpshufd $0, %xmm16, %xmm19
# CHECK-NEXT: 2 7 0.50 * vpshufd $0, (%rax), %xmm19
# CHECK-NEXT: 2 7 0.50 * vpshufd $0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 1 1.00 vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1 1 1.00 * vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vshuff32x4 $0, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 2 8 1.00 * vshuff32x4 $0, (%rax), %ymm17, %ymm19
# CHECK-NEXT: 2 8 1.00 * vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
@@ -3228,7 +3264,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 1935.00 278.00 579.50 32.00 738.50 486.50 486.50
+# CHECK-NEXT: - 1935.00 278.00 579.50 48.00 738.50 494.50 494.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -4420,6 +4456,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - 1.00 - - - - - vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmulld (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpshufd $0, %xmm16, %xmm19
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpshufd $0, (%rax), %xmm19
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpshufd $0, (%rax){1to4}, %xmm19
@@ -4558,6 +4602,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - - - - 1.00 - - vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: - - - - - 1.00 - - vshuff32x4 $0, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vshuff32x4 $0, (%rax), %ymm17, %ymm19
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s
index 88f3313c70fde2..b2fde3929106a5 100644
--- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s
@@ -814,6 +814,11 @@ vpermq %zmm16, %zmm17, %zmm19 {z}{k1}
vpermq (%rax), %zmm17, %zmm19 {z}{k1}
vpermq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
+
vpshufd $0, %zmm16, %zmm19
vpshufd $0, (%rax), %zmm19
vpshufd $0, (%rax){1to16}, %zmm19
@@ -884,6 +889,11 @@ vpunpcklqdq %zmm16, %zmm17, %zmm19 {z}{k1}
vpunpcklqdq (%rax), %zmm17, %zmm19 {z}{k1}
vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
+
vshuff32x4 $0, %zmm16, %zmm17, %zmm19
vshuff32x4 $0, (%rax), %zmm17, %zmm19
vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1792,6 +1802,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 3 1.00 vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 11 1.00 * vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 11 1.00 * vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 35 19 8.00 * vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 12 4.00 * vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 19 12 4.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 12 4.00 * vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: 2 9 1.00 * vpshufd $0, (%rax), %zmm19
# CHECK-NEXT: 2 9 1.00 * vpshufd $0, (%rax){1to16}, %zmm19
@@ -1855,6 +1869,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 1.00 vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 9 1.00 * vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 9 1.00 * vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 35 19 8.00 * vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 12 4.00 * vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 19 12 4.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 12 4.00 * vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 11 1.00 * vshuff32x4 $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: 2 11 1.00 * vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
@@ -2037,7 +2055,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
-# CHECK-NEXT: 491.00 12.00 218.33 218.33 8.50 577.00 - 8.50 8.50 8.50 - 218.33 -
+# CHECK-NEXT: 508.60 13.60 218.33 218.33 48.50 578.60 1.60 48.50 48.50 48.50 1.60 218.33 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@@ -2755,6 +2773,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2.20 0.20 - - 8.00 0.20 0.20 8.00 8.00 8.00 0.20 - - vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax), %zmm19
# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax){1to16}, %zmm19
@@ -2818,6 +2840,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2.20 0.20 - - 8.00 0.20 0.20 8.00 8.00 8.00 0.20 - - vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s
index 3ad66f1c3d7128..d8c76832d38d3e 100644
--- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s
@@ -1344,6 +1344,16 @@ vpmulld %ymm16, %ymm17, %ymm19 {z}{k1}
vpmulld (%rax), %ymm17, %ymm19 {z}{k1}
vpmulld (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
+
vpshufd $0, %xmm16, %xmm19
vpshufd $0, (%rax), %xmm19
vpshufd $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq %ymm16, %ymm17, %ymm19 {z}{k1}
vpunpckldq (%rax), %ymm17, %ymm19 {z}{k1}
vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
+
vshuff32x4 $0, %ymm16, %ymm17, %ymm19
vshuff32x4 $0, (%rax), %ymm17, %ymm19
vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 2 10 1.00 vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 3 18 1.00 * vpmulld (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 3 18 1.00 * vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 11 12 2.00 * vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 12 1.00 * vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 12 1.00 * vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 12 1.00 * vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 19 12 4.00 * vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 12 2.00 * vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 11 12 2.00 * vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 12 2.00 * vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: 1 1 0.50 vpshufd $0, %xmm16, %xmm19
# CHECK-NEXT: 2 8 0.50 * vpshufd $0, (%rax), %xmm19
# CHECK-NEXT: 2 8 0.50 * vpshufd $0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 1 0.50 vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 9 0.50 * vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 9 0.50 * vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 11 12 2.00 * vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 12 1.00 * vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 12 1.00 * vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 12 1.00 * vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 19 12 4.00 * vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 12 2.00 * vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 11 12 2.00 * vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 12 2.00 * vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 2 11 1.00 * vshuff32x4 $0, (%rax), %ymm17, %ymm19
# CHECK-NEXT: 2 11 1.00 * vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
@@ -3233,7 +3269,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
-# CHECK-NEXT: 377.33 401.33 328.33 328.33 16.00 794.33 - 16.00 16.00 16.00 - 328.33 -
+# CHECK-NEXT: 404.53 412.53 328.33 328.33 46.00 797.53 3.20 46.00 46.00 46.00 3.20 328.33 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@@ -4425,6 +4461,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %xmm16, %xmm19
# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %xmm19
# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to4}, %xmm19
@@ -4563,6 +4607,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %ymm17, %ymm19
# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
index 108ef75b0ac417..5eaa0f91fdaaba 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
@@ -814,6 +814,11 @@ vpermq %zmm16, %zmm17, %zmm19 {z}{k1}
vpermq (%rax), %zmm17, %zmm19 {z}{k1}
vpermq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
+
vpshufd $0, %zmm16, %zmm19
vpshufd $0, (%rax), %zmm19
vpshufd $0, (%rax){1to16}, %zmm19
@@ -884,6 +889,11 @@ vpunpcklqdq %zmm16, %zmm17, %zmm19 {z}{k1}
vpunpcklqdq (%rax), %zmm17, %zmm19 {z}{k1}
vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
+
vshuff32x4 $0, %zmm16, %zmm17, %zmm19
vshuff32x4 $0, (%rax), %zmm17, %zmm19
vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1792,6 +1802,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 3 1.00 vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 10 1.00 * vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 10 1.00 * vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 36 8 16.00 * vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 8.00 * vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 8.00 * vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax), %zmm19
# CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax){1to16}, %zmm19
@@ -1855,6 +1869,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 1.00 vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 36 7 16.00 * vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 8.00 * vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 8.00 * vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
@@ -2034,7 +2052,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 612.00 340.67 99.67 333.17 333.17 17.00 645.67 2.00 5.67
+# CHECK-NEXT: - 612.00 349.67 102.67 355.17 355.17 83.00 650.67 5.00 27.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -2752,6 +2770,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.50 0.50 5.33 5.33 16.00 1.50 0.50 5.33 vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpshufd $0, (%rax), %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpshufd $0, (%rax){1to16}, %zmm19
@@ -2815,6 +2837,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.50 0.50 5.33 5.33 16.00 1.50 0.50 5.33 vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vshuff32x4 $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
index 2ad91ea514aa20..b4b18101a67b80 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
@@ -1344,6 +1344,16 @@ vpmulld %ymm16, %ymm17, %ymm19 {z}{k1}
vpmulld (%rax), %ymm17, %ymm19 {z}{k1}
vpmulld (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
+
vpshufd $0, %xmm16, %xmm19
vpshufd $0, (%rax), %xmm19
vpshufd $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq %ymm16, %ymm17, %ymm19 {z}{k1}
vpunpckldq (%rax), %ymm17, %ymm19 {z}{k1}
vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
+
vshuff32x4 $0, %ymm16, %ymm17, %ymm19
vshuff32x4 $0, (%rax), %ymm17, %ymm19
vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 2 10 1.00 vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 3 17 1.00 * vpmulld (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 3 17 1.00 * vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 12 8 4.00 * vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 7 2.00 * vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 8 8 2.00 * vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 7 2.00 * vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 20 8 8.00 * vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 7 4.00 * vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 8 8 2.00 * vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 7 4.00 * vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vpshufd $0, %xmm16, %xmm19
# CHECK-NEXT: 2 7 1.00 * vpshufd $0, (%rax), %xmm19
# CHECK-NEXT: 2 7 1.00 * vpshufd $0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 1 1.00 vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 12 8 4.00 * vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 7 2.00 * vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 8 8 2.00 * vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 7 2.00 * vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 20 8 8.00 * vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 7 4.00 * vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 8 8 2.00 * vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 7 4.00 * vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax), %ymm17, %ymm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
@@ -3230,7 +3266,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 423.00 438.33 350.33 503.17 503.17 32.00 785.33 4.00 10.67
+# CHECK-NEXT: - 423.00 462.33 358.33 521.83 521.83 88.00 801.33 12.00 29.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -4422,6 +4458,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: - - 1.50 0.50 1.33 1.33 4.00 1.50 0.50 1.33 vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 0.67 0.67 2.00 0.50 0.50 0.67 vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 0.67 0.67 2.00 1.50 0.50 0.67 vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 0.67 0.67 2.00 0.50 0.50 0.67 vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 1.50 0.50 2.67 vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 1.33 1.33 4.00 0.50 0.50 1.33 vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 0.67 0.67 2.00 1.50 0.50 0.67 vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 1.33 1.33 4.00 0.50 0.50 1.33 vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vpshufd $0, %xmm16, %xmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpshufd $0, (%rax), %xmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpshufd $0, (%rax){1to4}, %xmm19
@@ -4560,6 +4604,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: - - 1.50 0.50 1.33 1.33 4.00 1.50 0.50 1.33 vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 0.67 0.67 2.00 0.50 0.50 0.67 vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 0.67 0.67 2.00 1.50 0.50 0.67 vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 0.67 0.67 2.00 0.50 0.50 0.67 vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 1.50 0.50 2.67 vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 1.33 1.33 4.00 0.50 0.50 1.33 vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 0.67 0.67 2.00 1.50 0.50 0.67 vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 1.33 1.33 4.00 0.50 0.50 1.33 vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vshuff32x4 $0, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vshuff32x4 $0, (%rax), %ymm17, %ymm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
index 51caeab1b3b7ca..6e52eddd9a8f5e 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
@@ -814,6 +814,11 @@ vpermq %zmm16, %zmm17, %zmm19 {z}{k1}
vpermq (%rax), %zmm17, %zmm19 {z}{k1}
vpermq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
+
vpshufd $0, %zmm16, %zmm19
vpshufd $0, (%rax), %zmm19
vpshufd $0, (%rax){1to16}, %zmm19
@@ -884,6 +889,11 @@ vpunpcklqdq %zmm16, %zmm17, %zmm19 {z}{k1}
vpunpcklqdq (%rax), %zmm17, %zmm19 {z}{k1}
vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
+
vshuff32x4 $0, %zmm16, %zmm17, %zmm19
vshuff32x4 $0, (%rax), %zmm17, %zmm19
vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1792,6 +1802,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 1 0.50 vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: 1 8 1.00 * vpshufd $0, (%rax), %zmm19
# CHECK-NEXT: 1 8 1.00 * vpshufd $0, (%rax){1to16}, %zmm19
@@ -1855,6 +1869,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 1.00 vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 1.00 * vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 1.00 * vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 1 1.00 * vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 2 1.00 vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 3 9 1.00 * vshuff32x4 $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: 3 9 1.00 * vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
@@ -2047,7 +2065,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 2.67 2.67 2.67 - - - - - 221.00 1060.50 618.00 352.50 297.00 297.00 17.00 200.00 200.00 200.00 194.33 194.33 194.33 8.50 8.50
+# CHECK-NEXT: 5.33 5.33 5.33 - - - - - 221.00 1060.50 618.00 352.50 297.00 297.00 17.00 205.33 205.33 205.33 194.33 194.33 194.33 16.50 16.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2765,6 +2783,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufd $0, (%rax), %zmm19
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufd $0, (%rax){1to16}, %zmm19
@@ -2828,6 +2850,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vshuff32x4 $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
index 2d26eb50351a08..4636e23d9df3e2 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
@@ -1344,6 +1344,16 @@ vpmulld %ymm16, %ymm17, %ymm19 {z}{k1}
vpmulld (%rax), %ymm17, %ymm19 {z}{k1}
vpmulld (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
+
vpshufd $0, %xmm16, %xmm19
vpshufd $0, (%rax), %xmm19
vpshufd $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq %ymm16, %ymm17, %ymm19 {z}{k1}
vpunpckldq (%rax), %ymm17, %ymm19 {z}{k1}
vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
+
vshuff32x4 $0, %ymm16, %ymm17, %ymm19
vshuff32x4 $0, (%rax), %ymm17, %ymm19
vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 3 0.50 vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vpmulld (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: 1 1 0.50 vpshufd $0, %xmm16, %xmm19
# CHECK-NEXT: 1 8 0.50 * vpshufd $0, (%rax), %xmm19
# CHECK-NEXT: 1 8 0.50 * vpshufd $0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 1 0.50 vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1 1 1.00 * vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 1.00 * vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: 1 2 1.00 vshuff32x4 $0, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 3 9 1.00 * vshuff32x4 $0, (%rax), %ymm17, %ymm19
# CHECK-NEXT: 3 9 1.00 * vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
@@ -3243,7 +3279,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 5.33 5.33 5.33 - - - - - 208.00 948.00 501.50 261.50 478.50 478.50 32.00 324.33 324.33 324.33 313.67 313.67 313.67 16.00 16.00
+# CHECK-NEXT: 10.67 10.67 10.67 - - - - - 208.00 948.00 501.50 261.50 478.50 478.50 32.00 335.00 335.00 335.00 313.67 313.67 313.67 32.00 32.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -4435,6 +4471,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulld (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpshufd $0, %xmm16, %xmm19
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufd $0, (%rax), %xmm19
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufd $0, (%rax){1to4}, %xmm19
@@ -4573,6 +4617,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vshuff32x4 $0, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vshuff32x4 $0, (%rax), %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
>From 015936c16039e61a3ac8bd919532cc84a38c662c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Thu, 22 Aug 2024 18:07:44 +0100
Subject: [PATCH 3/4] [MCA][X86] Add scatter instruction test coverage for
#105675
Missed IceLakeServer when I updated the other CPUs in 6ec4c9c3eb4a556f848dac37a2d6f0d46ecc6f02
(cherry picked from commit 7faf2c95a4f1c3148c891608ed516eda3c9d3eb4)
---
.../X86/IceLakeServer/resources-avx512.s | 28 +++++++++-
.../X86/IceLakeServer/resources-avx512vl.s | 54 ++++++++++++++++++-
2 files changed, 80 insertions(+), 2 deletions(-)
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
index 1ff8eccf290a6f..c4df992f3aebca 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
@@ -814,6 +814,11 @@ vpermq %zmm16, %zmm17, %zmm19 {z}{k1}
vpermq (%rax), %zmm17, %zmm19 {z}{k1}
vpermq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
+
vpshufd $0, %zmm16, %zmm19
vpshufd $0, (%rax), %zmm19
vpshufd $0, (%rax){1to16}, %zmm19
@@ -884,6 +889,11 @@ vpunpcklqdq %zmm16, %zmm17, %zmm19 {z}{k1}
vpunpcklqdq (%rax), %zmm17, %zmm19 {z}{k1}
vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
+
vshuff32x4 $0, %zmm16, %zmm17, %zmm19
vshuff32x4 $0, (%rax), %zmm17, %zmm19
vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1792,6 +1802,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 3 1.00 vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 10 1.00 * vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 10 1.00 * vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 36 8 8.00 * vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 4.00 * vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 0.50 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 4.00 * vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax), %zmm19
# CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax){1to16}, %zmm19
@@ -1855,6 +1869,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 1.00 vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 36 7 8.00 * vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 4.00 * vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1 1 0.50 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 4.00 * vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
@@ -2036,7 +2054,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - 612.00 399.17 99.67 327.50 327.50 8.50 587.17 2.00 8.50 8.50 8.50
+# CHECK-NEXT: - 612.00 408.17 102.67 327.50 327.50 41.50 592.17 5.00 41.50 41.50 41.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -2754,6 +2772,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.50 0.50 - - 8.00 1.50 0.50 8.00 8.00 8.00 vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - - - - - 0.50 - - 0.50 0.50 0.50 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpshufd $0, (%rax), %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpshufd $0, (%rax){1to16}, %zmm19
@@ -2817,6 +2839,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.50 0.50 - - 8.00 1.50 0.50 8.00 8.00 8.00 vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - - - - - 0.50 - - 0.50 0.50 0.50 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vshuff32x4 $0, (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s
index 375087ae0cfe4e..00e5c3b03f6f52 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s
@@ -1344,6 +1344,16 @@ vpmulld %ymm16, %ymm17, %ymm19 {z}{k1}
vpmulld (%rax), %ymm17, %ymm19 {z}{k1}
vpmulld (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
+
vpshufd $0, %xmm16, %xmm19
vpshufd $0, (%rax), %xmm19
vpshufd $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq %ymm16, %ymm17, %ymm19 {z}{k1}
vpunpckldq (%rax), %ymm17, %ymm19 {z}{k1}
vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
+
vshuff32x4 $0, %ymm16, %ymm17, %ymm19
vshuff32x4 $0, (%rax), %ymm17, %ymm19
vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 2 10 1.00 vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 3 17 1.00 * vpmulld (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 3 17 1.00 * vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 12 8 2.00 * vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 7 1.00 * vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 8 8 1.00 * vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 7 1.00 * vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 20 8 4.00 * vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 7 2.00 * vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 8 8 1.00 * vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 7 2.00 * vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: 1 1 0.50 vpshufd $0, %xmm16, %xmm19
# CHECK-NEXT: 2 7 0.50 * vpshufd $0, (%rax), %xmm19
# CHECK-NEXT: 2 7 0.50 * vpshufd $0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 1 0.50 vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 12 8 2.00 * vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 7 1.00 * vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 8 8 1.00 * vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 7 7 1.00 * vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 20 8 4.00 * vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 7 2.00 * vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 8 8 1.00 * vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 11 7 2.00 * vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax), %ymm17, %ymm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
@@ -3232,7 +3268,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - 423.00 438.33 413.33 492.50 492.50 16.00 722.33 4.00 16.00 16.00 16.00
+# CHECK-NEXT: - 423.00 462.33 421.33 492.50 492.50 44.00 738.33 12.00 44.00 44.00 44.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -4424,6 +4460,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - 1.00 1.00 - - - - - - - - vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - vpmulld (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: - - 1.50 0.50 - - 2.00 1.50 0.50 2.00 2.00 2.00 vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 1.00 0.50 0.50 1.00 1.00 1.00 vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 1.00 1.50 0.50 1.00 1.00 1.00 vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 1.00 0.50 0.50 1.00 1.00 1.00 vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 4.00 1.50 0.50 4.00 4.00 4.00 vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 2.00 0.50 0.50 2.00 2.00 2.00 vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 1.00 1.50 0.50 1.00 1.00 1.00 vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 2.00 0.50 0.50 2.00 2.00 2.00 vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vpshufd $0, %xmm16, %xmm19
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vpshufd $0, (%rax), %xmm19
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vpshufd $0, (%rax){1to4}, %xmm19
@@ -4562,6 +4606,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: - - 1.50 0.50 - - 2.00 1.50 0.50 2.00 2.00 2.00 vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 1.00 0.50 0.50 1.00 1.00 1.00 vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 1.00 1.50 0.50 1.00 1.00 1.00 vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 1.00 0.50 0.50 1.00 1.00 1.00 vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 4.00 1.50 0.50 4.00 4.00 4.00 vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 2.00 0.50 0.50 2.00 2.00 2.00 vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 1.00 1.50 0.50 1.00 1.00 1.00 vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 2.00 0.50 0.50 2.00 2.00 2.00 vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vshuff32x4 $0, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vshuff32x4 $0, (%rax), %ymm17, %ymm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
>From b65409f9e285491e869a34001493d8dabe5d270a Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 23 Aug 2024 10:32:08 +0100
Subject: [PATCH 4/4] [MCA][X86] Add missing 512-bit vpscatterqd/vscatterqps
schedule data (REAPPLIED)
This doesn't match uops.info yet - but it matches the existing vpscatterdq/vscatterqpd entries like uops.info says it should
Reapplied with codegen fix for scatter-schedule.ll
Fixes #105675
(cherry picked from commit cf6cd1fd67356ca0c2972992928592d2430043d2)
---
llvm/lib/Target/X86/X86SchedIceLake.td | 2 ++
llvm/lib/Target/X86/X86SchedSkylakeServer.td | 2 ++
llvm/test/CodeGen/X86/scatter-schedule.ll | 4 ++--
.../llvm-mca/X86/IceLakeServer/resources-avx512.s | 10 +++++-----
.../llvm-mca/X86/SkylakeServer/resources-avx512.s | 10 +++++-----
5 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 186d4d84c25104..b68be9be6d4731 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -1510,8 +1510,10 @@ def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort015
let ReleaseAtCycles = [1,8,8,2];
}
def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
+ VPSCATTERQDZmr,
VPSCATTERQQZmr,
VSCATTERDPDZmr,
+ VSCATTERQPSZmr,
VSCATTERQPDZmr)>;
def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 4fded44085e897..2423602d06c470 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -1499,8 +1499,10 @@ def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort015
let ReleaseAtCycles = [1,8,8,2];
}
def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr,
+ VPSCATTERQDZmr,
VPSCATTERQQZmr,
VSCATTERDPDZmr,
+ VSCATTERQPSZmr,
VSCATTERQPDZmr)>;
def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
diff --git a/llvm/test/CodeGen/X86/scatter-schedule.ll b/llvm/test/CodeGen/X86/scatter-schedule.ll
index c841e23eab76b2..762a050247a87e 100644
--- a/llvm/test/CodeGen/X86/scatter-schedule.ll
+++ b/llvm/test/CodeGen/X86/scatter-schedule.ll
@@ -10,8 +10,8 @@ define void @test(i64 %x272, <16 x ptr> %x335, <16 x i32> %x270) {
; CHECK-LABEL: test:
; CHECK: # %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
-; CHECK-NEXT: kxnorw %k0, %k0, %k2
-; CHECK-NEXT: vpscatterqd %ymm2, (,%zmm0) {%k2}
+; CHECK-NEXT: vpscatterqd %ymm2, (,%zmm0) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; CHECK-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
; CHECK-NEXT: vzeroupper
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
index c4df992f3aebca..c509e766540b15 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
@@ -1804,7 +1804,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 10 1.00 * vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 36 8 8.00 * vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 4.00 * vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: 1 1 0.50 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 4.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 4.00 * vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax), %zmm19
@@ -1871,7 +1871,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 36 7 8.00 * vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 4.00 * vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: 1 1 0.50 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 4.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 4.00 * vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax), %zmm17, %zmm19
@@ -2054,7 +2054,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - 612.00 408.17 102.67 327.50 327.50 41.50 592.17 5.00 41.50 41.50 41.50
+# CHECK-NEXT: - 612.00 411.17 103.67 327.50 327.50 48.50 593.17 6.00 48.50 48.50 48.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -2774,7 +2774,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 1.50 0.50 - - 8.00 1.50 0.50 8.00 8.00 8.00 vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: - - - - - - 0.50 - - 0.50 0.50 0.50 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpshufd $0, (%rax), %zmm19
@@ -2841,7 +2841,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 1.50 0.50 - - 8.00 1.50 0.50 8.00 8.00 8.00 vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: - - - - - - 0.50 - - 0.50 0.50 0.50 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vshuff32x4 $0, (%rax), %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
index 5eaa0f91fdaaba..9c006d4ebb077d 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
@@ -1804,7 +1804,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 10 1.00 * vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 36 8 16.00 * vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 8.00 * vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: 1 1 1.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 8.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 8.00 * vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax), %zmm19
@@ -1871,7 +1871,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 36 7 16.00 * vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 8.00 * vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: 1 1 1.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 8.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 8.00 * vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax), %zmm17, %zmm19
@@ -2052,7 +2052,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 612.00 349.67 102.67 355.17 355.17 83.00 650.67 5.00 27.67
+# CHECK-NEXT: - 612.00 352.67 103.67 359.83 359.83 97.00 651.67 6.00 32.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -2772,7 +2772,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 1.50 0.50 5.33 5.33 16.00 1.50 0.50 5.33 vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpshufd $0, (%rax), %zmm19
@@ -2839,7 +2839,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 1.50 0.50 5.33 5.33 16.00 1.50 0.50 5.33 vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vshuff32x4 $0, (%rax), %zmm17, %zmm19
More information about the llvm-branch-commits
mailing list