[llvm] 314e431 - [AArch64] Fix N2 SchedModel element-to-element INS latencies
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 22 03:21:08 PST 2023
Author: Sjoerd Meijer
Date: 2023-02-22T10:55:55Z
New Revision: 314e431406de6e1518836f304a0a0e469c670a71
URL: https://github.com/llvm/llvm-project/commit/314e431406de6e1518836f304a0a0e469c670a71
DIFF: https://github.com/llvm/llvm-project/commit/314e431406de6e1518836f304a0a0e469c670a71.diff
LOG: [AArch64] Fix N2 SchedModel element-to-element INS latencies
The instruction regexp "^INSv" for the insert gen-reg-to-element was also
matching the element-to-element instruction, which only has a latency of 2 and
not 5, so we were getting that wrong.
Differential Revision: https://reviews.llvm.org/D144508
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index 21a0e927d7567..d9d5c2be03092 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -1210,7 +1210,7 @@ def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>;
def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
// ASIMD transfer, gen reg to element
-def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>;
+def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
// ASIMD load instructions
// -----------------------------------------------------------------------------
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
index 7031a60f99587..13c1403f356ee 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
@@ -360,6 +360,14 @@ mov b0, v0.b[15]
mov d6, v0.d[1]
mov h2, v0.h[5]
mov s17, v0.s[2]
+mov v2.b[0], v0.b[0]
+mov v2.h[1], v0.h[1]
+mov v2.s[2], v0.s[2]
+mov v2.d[1], v0.d[1]
+mov v0.b[0], w8
+mov v0.h[1], w8
+mov v0.s[2], w8
+mov v0.d[1], x8
mov v0.16b, v0.16b
mov v0.8b, v0.8b
movi d15, #0xff00ff00ff00ff
@@ -1429,6 +1437,14 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.50 mov d6, v0.d[1]
# CHECK-NEXT: 1 2 0.50 mov h2, v0.h[5]
# CHECK-NEXT: 1 2 0.50 mov s17, v0.s[2]
+# CHECK-NEXT: 1 2 0.50 mov v2.b[0], v0.b[0]
+# CHECK-NEXT: 1 2 0.50 mov v2.h[1], v0.h[1]
+# CHECK-NEXT: 1 2 0.50 mov v2.s[2], v0.s[2]
+# CHECK-NEXT: 1 2 0.50 mov v2.d[1], v0.d[1]
+# CHECK-NEXT: 2 5 1.00 mov v0.b[0], w8
+# CHECK-NEXT: 2 5 1.00 mov v0.h[1], w8
+# CHECK-NEXT: 2 5 1.00 mov v0.s[2], w8
+# CHECK-NEXT: 2 5 1.00 mov v0.d[1], x8
# CHECK-NEXT: 1 2 0.50 mov v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.50 mov v0.8b, v0.8b
# CHECK-NEXT: 1 2 0.50 movi d15, #0xff00ff00ff00ff
@@ -2147,7 +2163,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8]
-# CHECK-NEXT: - - - - 26.67 52.17 52.17 28.75 7.75 7.75 7.75 708.00 617.00
+# CHECK-NEXT: - - - - 26.67 52.17 52.17 32.75 7.75 7.75 7.75 712.00 621.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions:
@@ -2510,6 +2526,14 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov d6, v0.d[1]
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov h2, v0.h[5]
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov s17, v0.s[2]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v2.b[0], v0.b[0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v2.h[1], v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v2.s[2], v0.s[2]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v2.d[1], v0.d[1]
+# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 mov v0.b[0], w8
+# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 mov v0.h[1], w8
+# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 mov v0.s[2], w8
+# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 mov v0.d[1], x8
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v0.16b, v0.16b
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v0.8b, v0.8b
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 movi d15, #0xff00ff00ff00ff
More information about the llvm-commits
mailing list