[llvm] 314e431 - [AArch64] Fix N2 SchedModel element-to-element INS latencies

Sjoerd Meijer via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 22 03:21:08 PST 2023


Author: Sjoerd Meijer
Date: 2023-02-22T10:55:55Z
New Revision: 314e431406de6e1518836f304a0a0e469c670a71

URL: https://github.com/llvm/llvm-project/commit/314e431406de6e1518836f304a0a0e469c670a71
DIFF: https://github.com/llvm/llvm-project/commit/314e431406de6e1518836f304a0a0e469c670a71.diff

LOG: [AArch64] Fix N2 SchedModel element-to-element INS latencies

The instruction regexp "^INSv" for the insert gen-reg-to-element was also
matching the element-to-element instruction, which only has a latency of 2 and
not 5, so we were getting that wrong.

Differential Revision: https://reviews.llvm.org/D144508

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
    llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index 21a0e927d7567..d9d5c2be03092 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -1210,7 +1210,7 @@ def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>;
 def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
 
 // ASIMD transfer, gen reg to element
-def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>;
+def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
 
 // ASIMD load instructions
 // -----------------------------------------------------------------------------

diff  --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
index 7031a60f99587..13c1403f356ee 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-neon-instructions.s
@@ -360,6 +360,14 @@ mov b0, v0.b[15]
 mov d6, v0.d[1]
 mov h2, v0.h[5]
 mov s17, v0.s[2]
+mov v2.b[0], v0.b[0]
+mov v2.h[1], v0.h[1]
+mov v2.s[2], v0.s[2]
+mov v2.d[1], v0.d[1]
+mov v0.b[0], w8
+mov v0.h[1], w8
+mov v0.s[2], w8
+mov v0.d[1], x8
 mov v0.16b, v0.16b
 mov v0.8b, v0.8b
 movi d15, #0xff00ff00ff00ff
@@ -1429,6 +1437,14 @@ zip2	v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  1      2     0.50                        mov	d6, v0.d[1]
 # CHECK-NEXT:  1      2     0.50                        mov	h2, v0.h[5]
 # CHECK-NEXT:  1      2     0.50                        mov	s17, v0.s[2]
+# CHECK-NEXT:  1      2     0.50                        mov	v2.b[0], v0.b[0]
+# CHECK-NEXT:  1      2     0.50                        mov	v2.h[1], v0.h[1]
+# CHECK-NEXT:  1      2     0.50                        mov	v2.s[2], v0.s[2]
+# CHECK-NEXT:  1      2     0.50                        mov	v2.d[1], v0.d[1]
+# CHECK-NEXT:  2      5     1.00                        mov	v0.b[0], w8
+# CHECK-NEXT:  2      5     1.00                        mov	v0.h[1], w8
+# CHECK-NEXT:  2      5     1.00                        mov	v0.s[2], w8
+# CHECK-NEXT:  2      5     1.00                        mov	v0.d[1], x8
 # CHECK-NEXT:  1      2     0.50                        mov	v0.16b, v0.16b
 # CHECK-NEXT:  1      2     0.50                        mov	v0.8b, v0.8b
 # CHECK-NEXT:  1      2     0.50                        movi	d15, #0xff00ff00ff00ff
@@ -2147,7 +2163,7 @@ zip2	v0.8h, v0.8h, v0.8h
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]
-# CHECK-NEXT:  -      -      -      -     26.67  52.17  52.17  28.75  7.75   7.75   7.75   708.00 617.00
+# CHECK-NEXT:  -      -      -      -     26.67  52.17  52.17  32.75  7.75   7.75   7.75   712.00 621.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    Instructions:
@@ -2510,6 +2526,14 @@ zip2	v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	d6, v0.d[1]
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	h2, v0.h[5]
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	s17, v0.s[2]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v2.b[0], v0.b[0]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v2.h[1], v0.h[1]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v2.s[2], v0.s[2]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v2.d[1], v0.d[1]
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -     0.50   0.50   mov	v0.b[0], w8
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -     0.50   0.50   mov	v0.h[1], w8
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -     0.50   0.50   mov	v0.s[2], w8
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -     0.50   0.50   mov	v0.d[1], x8
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v0.16b, v0.16b
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v0.8b, v0.8b
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   movi	d15, #0xff00ff00ff00ff


        


More information about the llvm-commits mailing list