[PATCH] D144508: [AArch64] Fix N2 SchedModel INS instruction latencies

Sjoerd Meijer via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 21 11:06:32 PST 2023


SjoerdMeijer created this revision.
SjoerdMeijer added reviewers: c-rhodes, dmgreen, paulwalker-arm, peterwaller-arm.
Herald added subscribers: gbedwell, hiraditya, kristof.beyls.
Herald added a reviewer: andreadb.
Herald added a project: All.
SjoerdMeijer requested review of this revision.
Herald added a project: LLVM.

The instruction regexp "^INSv" for the insert gen-reg-to-element was also matching the element-to-element instruction, which has a latency of 2 and not 5 according to the Software Optimization Guide [1], so we were getting that wrong.

I haven't done any performance runs with this change because I don't have access to N2 hardware and also because the fix is hopefully obvious enough. My use-case with this was llvm-mca which is getting things wrong because of this.

[1] https://developer.arm.com/documentation/PJDOC-466751330-18256/latest/


https://reviews.llvm.org/D144508

Files:
  llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
  llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s


Index: llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
===================================================================
--- llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
+++ llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
@@ -360,6 +360,8 @@
 mov d6, v0.d[1]
 mov h2, v0.h[5]
 mov s17, v0.s[2]
+mov v2.s[1], v0.s[2]
+mov v0.s[3], w8
 mov v0.16b, v0.16b
 mov v0.8b, v0.8b
 movi d15, #0xff00ff00ff00ff
@@ -1429,6 +1431,8 @@
 # CHECK-NEXT:  1      2     0.50                        mov	d6, v0.d[1]
 # CHECK-NEXT:  1      2     0.50                        mov	h2, v0.h[5]
 # CHECK-NEXT:  1      2     0.50                        mov	s17, v0.s[2]
+# CHECK-NEXT:  1      2     0.50                        mov	v2.s[1], v0.s[2]
+# CHECK-NEXT:  2      5     1.00                        mov	v0.s[3], w8
 # CHECK-NEXT:  1      2     0.50                        mov	v0.16b, v0.16b
 # CHECK-NEXT:  1      2     0.50                        mov	v0.8b, v0.8b
 # CHECK-NEXT:  1      2     0.50                        movi	d15, #0xff00ff00ff00ff
@@ -2147,7 +2151,7 @@
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]
-# CHECK-NEXT:  -      -      -      -     26.67  52.17  52.17  28.75  7.75   7.75   7.75   708.00 617.00
+# CHECK-NEXT:  -      -      -      -     26.67  52.17  52.17  29.75  7.75   7.75   7.75   709.00 618.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    Instructions:
@@ -2510,6 +2514,8 @@
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	d6, v0.d[1]
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	h2, v0.h[5]
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	s17, v0.s[2]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v2.s[1], v0.s[2]
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -     0.50   0.50   mov	v0.s[3], w8
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v0.16b, v0.16b
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v0.8b, v0.8b
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   movi	d15, #0xff00ff00ff00ff
Index: llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -1210,7 +1210,10 @@
 def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
 
 // ASIMD transfer, gen reg to element
-def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>;
+def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
+
+// ASIMD insert, element to element
+def : InstRW<[N2Write_2cyc_1V], (instregex "^INSvi(8|16|32|64)lane$")>;
 
 // ASIMD load instructions
 // -----------------------------------------------------------------------------


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D144508.499238.patch
Type: text/x-patch
Size: 3388 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230221/2f28842b/attachment.bin>


More information about the llvm-commits mailing list