[PATCH] D144508: [AArch64] Fix N2 SchedModel INS instruction latencies
Sjoerd Meijer via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 21 11:06:32 PST 2023
SjoerdMeijer created this revision.
SjoerdMeijer added reviewers: c-rhodes, dmgreen, paulwalker-arm, peterwaller-arm.
Herald added subscribers: gbedwell, hiraditya, kristof.beyls.
Herald added a reviewer: andreadb.
Herald added a project: All.
SjoerdMeijer requested review of this revision.
Herald added a project: LLVM.
The instruction regexp "^INSv" for the insert gen-reg-to-element was also matching the element-to-element instruction, which has a latency of 2 and not 5 according to the Software Optimization Guide [1], so we were getting that wrong.
I haven't done any performance runs with this change because I don't have access to N2 hardware and also because the fix is hopefully obvious enough. My use-case with this was llvm-mca which is getting things wrong because of this.
[1] https://developer.arm.com/documentation/PJDOC-466751330-18256/latest/
https://reviews.llvm.org/D144508
Files:
llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
Index: llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
===================================================================
--- llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
+++ llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
@@ -360,6 +360,8 @@
mov d6, v0.d[1]
mov h2, v0.h[5]
mov s17, v0.s[2]
+mov v2.s[1], v0.s[2]
+mov v0.s[3], w8
mov v0.16b, v0.16b
mov v0.8b, v0.8b
movi d15, #0xff00ff00ff00ff
@@ -1429,6 +1431,8 @@
# CHECK-NEXT: 1 2 0.50 mov d6, v0.d[1]
# CHECK-NEXT: 1 2 0.50 mov h2, v0.h[5]
# CHECK-NEXT: 1 2 0.50 mov s17, v0.s[2]
+# CHECK-NEXT: 1 2 0.50 mov v2.s[1], v0.s[2]
+# CHECK-NEXT: 2 5 1.00 mov v0.s[3], w8
# CHECK-NEXT: 1 2 0.50 mov v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.50 mov v0.8b, v0.8b
# CHECK-NEXT: 1 2 0.50 movi d15, #0xff00ff00ff00ff
@@ -2147,7 +2151,7 @@
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8]
-# CHECK-NEXT: - - - - 26.67 52.17 52.17 28.75 7.75 7.75 7.75 708.00 617.00
+# CHECK-NEXT: - - - - 26.67 52.17 52.17 29.75 7.75 7.75 7.75 709.00 618.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions:
@@ -2510,6 +2514,8 @@
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov d6, v0.d[1]
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov h2, v0.h[5]
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov s17, v0.s[2]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v2.s[1], v0.s[2]
+# CHECK-NEXT: - - - - - - - 1.00 - - - 0.50 0.50 mov v0.s[3], w8
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v0.16b, v0.16b
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 mov v0.8b, v0.8b
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 movi d15, #0xff00ff00ff00ff
Index: llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -1210,7 +1210,10 @@
def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
// ASIMD transfer, gen reg to element
-def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>;
+def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
+
+// ASIMD insert, element to element
+def : InstRW<[N2Write_2cyc_1V], (instregex "^INSvi(8|16|32|64)lane$")>;
// ASIMD load instructions
// -----------------------------------------------------------------------------
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D144508.499238.patch
Type: text/x-patch
Size: 3388 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230221/2f28842b/attachment.bin>
More information about the llvm-commits
mailing list