[llvm] [AArch64] Correct Neoverse V1 SVE 16-bit sdot/udot schedule pipelines. (PR #86142)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 21 09:00:07 PDT 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/86142
Fixes #86102
>From e4185b9558989ef6d1080f96187fdff4db8a8049 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 21 Mar 2024 15:56:47 +0000
Subject: [PATCH] [AArch64] Correct Neoverse V1 SVE 16-bit sdot/udot schedule
pipelines.
Fixes #86102
---
.../Target/AArch64/AArch64SchedNeoverseV1.td | 2 +-
.../AArch64/Neoverse/V1-sve-instructions.s | 18 +++++++++---------
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index c7dfd64b2fb24e..f7e6545f0dd386 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -1446,7 +1446,7 @@ def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>;
def : InstRW<[V1Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>;
// Dot product, 16 bit
-def : InstRW<[V1Write_4c_1V01], (instregex "^[SU]DOT_ZZZI?_D$")>;
+def : InstRW<[V1Write_4c_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
// Duplicate, immediate and indexed form
def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$",
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
index efa81b0ffcd48b..f120f5feaf7ced 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
@@ -4093,8 +4093,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 12 7.00 sdiv z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 20 7.00 sdivr z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 12 7.00 sdivr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 4 0.50 sdot z0.d, z1.h, z15.h[1]
-# CHECK-NEXT: 1 4 0.50 sdot z0.d, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 sdot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: 1 4 1.00 sdot z0.d, z1.h, z31.h
# CHECK-NEXT: 1 3 0.50 sdot z0.s, z1.b, z31.b
# CHECK-NEXT: 1 3 0.50 sdot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: 1 2 0.50 sel z23.b, p11, z13.b, z8.b
@@ -4569,8 +4569,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 12 7.00 udiv z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 20 7.00 udivr z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 12 7.00 udivr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 4 0.50 udot z0.d, z1.h, z15.h[1]
-# CHECK-NEXT: 1 4 0.50 udot z0.d, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 udot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: 1 4 1.00 udot z0.d, z1.h, z31.h
# CHECK-NEXT: 1 3 0.50 udot z0.s, z1.b, z31.b
# CHECK-NEXT: 1 3 0.50 udot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: 1 2 0.50 umax z0.b, z0.b, #0
@@ -4839,7 +4839,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10]
-# CHECK-NEXT: - - - - 88.67 500.67 500.67 797.50 2.50 92.50 92.50 1250.00 923.00 178.50 181.50
+# CHECK-NEXT: - - - - 88.67 500.67 500.67 797.50 2.50 92.50 92.50 1252.00 921.00 178.50 181.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10] Instructions:
@@ -6521,8 +6521,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - 7.00 - - - sdiv z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - 7.00 - - - sdivr z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - 7.00 - - - sdivr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - - sdot z0.d, z1.h, z15.h[1]
-# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - - sdot z0.d, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - sdot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - sdot z0.d, z1.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - - sdot z0.s, z1.b, z31.b
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - - sdot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - - sel z23.b, p11, z13.b, z8.b
@@ -6997,8 +6997,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - 7.00 - - - udiv z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - 7.00 - - - udivr z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - 7.00 - - - udivr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - - udot z0.d, z1.h, z15.h[1]
-# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - - udot z0.d, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - udot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - udot z0.d, z1.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - - udot z0.s, z1.b, z31.b
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - - udot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 - - umax z0.b, z0.b, #0
More information about the llvm-commits
mailing list