[llvm] cf6cd1f - [MCA][X86] Add missing 512-bit vpscatterqd/vscatterqps schedule data (REAPPLIED)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 23 02:33:37 PDT 2024
Author: Simon Pilgrim
Date: 2024-08-23T10:32:19+01:00
New Revision: cf6cd1fd67356ca0c2972992928592d2430043d2
URL: https://github.com/llvm/llvm-project/commit/cf6cd1fd67356ca0c2972992928592d2430043d2
DIFF: https://github.com/llvm/llvm-project/commit/cf6cd1fd67356ca0c2972992928592d2430043d2.diff
LOG: [MCA][X86] Add missing 512-bit vpscatterqd/vscatterqps schedule data (REAPPLIED)
This doesn't match uops.info yet - but it matches the existing vpscatterdq/vscatterqpd entries like uops.info says it should
Reapplied with codegen fix for scatter-schedule.ll
Fixes #105675
Added:
Modified:
llvm/lib/Target/X86/X86SchedIceLake.td
llvm/lib/Target/X86/X86SchedSkylakeServer.td
llvm/test/CodeGen/X86/scatter-schedule.ll
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index fd372ba4656eba..29b1464e19a32b 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -1524,8 +1524,10 @@ def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort015
let ReleaseAtCycles = [1,8,8,2];
}
def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
+ VPSCATTERQDZmr,
VPSCATTERQQZmr,
VSCATTERDPDZmr,
+ VSCATTERQPSZmr,
VSCATTERQPDZmr)>;
def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 4fded44085e897..2423602d06c470 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -1499,8 +1499,10 @@ def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort015
let ReleaseAtCycles = [1,8,8,2];
}
def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr,
+ VPSCATTERQDZmr,
VPSCATTERQQZmr,
VSCATTERDPDZmr,
+ VSCATTERQPSZmr,
VSCATTERQPDZmr)>;
def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
diff --git a/llvm/test/CodeGen/X86/scatter-schedule.ll b/llvm/test/CodeGen/X86/scatter-schedule.ll
index c841e23eab76b2..762a050247a87e 100644
--- a/llvm/test/CodeGen/X86/scatter-schedule.ll
+++ b/llvm/test/CodeGen/X86/scatter-schedule.ll
@@ -10,8 +10,8 @@ define void @test(i64 %x272, <16 x ptr> %x335, <16 x i32> %x270) {
; CHECK-LABEL: test:
; CHECK: # %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
-; CHECK-NEXT: kxnorw %k0, %k0, %k2
-; CHECK-NEXT: vpscatterqd %ymm2, (,%zmm0) {%k2}
+; CHECK-NEXT: vpscatterqd %ymm2, (,%zmm0) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; CHECK-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
; CHECK-NEXT: vzeroupper
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
index c4df992f3aebca..c509e766540b15 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
@@ -1804,7 +1804,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 10 1.00 * vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 36 8 8.00 * vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 4.00 * vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: 1 1 0.50 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 4.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 4.00 * vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax), %zmm19
@@ -1871,7 +1871,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 36 7 8.00 * vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 4.00 * vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: 1 1 0.50 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 4.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 4.00 * vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax), %zmm17, %zmm19
@@ -2054,7 +2054,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - 612.00 408.17 102.67 327.50 327.50 41.50 592.17 5.00 41.50 41.50 41.50
+# CHECK-NEXT: - 612.00 411.17 103.67 327.50 327.50 48.50 593.17 6.00 48.50 48.50 48.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -2774,7 +2774,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 1.50 0.50 - - 8.00 1.50 0.50 8.00 8.00 8.00 vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: - - - - - - 0.50 - - 0.50 0.50 0.50 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpshufd $0, (%rax), %zmm19
@@ -2841,7 +2841,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 1.50 0.50 - - 8.00 1.50 0.50 8.00 8.00 8.00 vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: - - - - - - 0.50 - - 0.50 0.50 0.50 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vshuff32x4 $0, (%rax), %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
index 5eaa0f91fdaaba..9c006d4ebb077d 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
@@ -1804,7 +1804,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 10 1.00 * vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 36 8 16.00 * vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 8.00 * vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: 1 1 1.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 8.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 8.00 * vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax), %zmm19
@@ -1871,7 +1871,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 36 7 16.00 * vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 8.00 * vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: 1 1 1.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 19 7 8.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 19 7 8.00 * vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax), %zmm17, %zmm19
@@ -2052,7 +2052,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 612.00 349.67 102.67 355.17 355.17 83.00 650.67 5.00 27.67
+# CHECK-NEXT: - 612.00 352.67 103.67 359.83 359.83 97.00 651.67 6.00 32.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -2772,7 +2772,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 1.50 0.50 5.33 5.33 16.00 1.50 0.50 5.33 vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vpshufd $0, %zmm16, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpshufd $0, (%rax), %zmm19
@@ -2839,7 +2839,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 1.50 0.50 5.33 5.33 16.00 1.50 0.50 5.33 vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
-# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vshuff32x4 $0, (%rax), %zmm17, %zmm19
More information about the llvm-commits
mailing list