[llvm] [AArch64] Corrected Latency Descriptions for NeoverseV2/N2 Scheduler (PR #147339)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 03:31:14 PDT 2025
https://github.com/yafet-a updated https://github.com/llvm/llvm-project/pull/147339
>From 1d42049cec4480eff08b85cbf2b2fd9a42d8bbbc Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at ybeyene-mlt.client.nvidia.com>
Date: Mon, 7 Jul 2025 20:51:47 +0100
Subject: [PATCH] Neoverse-V2 Scheduler Latency Update
---
.../Target/AArch64/AArch64SchedNeoverseV2.td | 21 ++--
.../llvm-mca/AArch64/Neoverse/N2-writeback.s | 104 +++++++++---------
.../AArch64/Neoverse/V2-neon-instructions.s | 70 ++++++------
3 files changed, 98 insertions(+), 97 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index 8d3a4553d4b73..b2c3da03b4b84 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -157,6 +157,7 @@ def V2Write_20c_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 20;
def V2Write_2c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 2; }
def V2Write_2c_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 2; }
def V2Write_3c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 3; }
+def V2Write_3c_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 3; }
def V2Write_4c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 4; }
def V2Write_4c_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
def V2Write_6c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 6; }
@@ -256,8 +257,8 @@ def V2Write_4c_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
let NumMicroOps = 2;
}
-def V2Write_4c_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
- let Latency = 4;
+def V2Write_5c_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
+ let Latency = 5;
let NumMicroOps = 2;
}
@@ -376,8 +377,8 @@ def V2Write_6c_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> {
let NumMicroOps = 2;
}
-def V2Write_4c_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
- let Latency = 4;
+def V2Write_6c_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
+ let Latency = 6;
let NumMicroOps = 2;
}
@@ -1468,14 +1469,14 @@ def : SchedAlias<WriteVq, V2Write_2c_1V>;
def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>;
// ASIMD arith, reduce, 4H/4S
-def : InstRW<[V2Write_2c_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
+def : InstRW<[V2Write_3c_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
// ASIMD arith, reduce, 8B/8H
-def : InstRW<[V2Write_4c_1V13_1V],
+def : InstRW<[V2Write_5c_1V13_1V],
(instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
// ASIMD arith, reduce, 16B
-def : InstRW<[V2Write_4c_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
+def : InstRW<[V2Write_6c_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
// ASIMD dot product
// ASIMD dot product using signed and unsigned integers
@@ -1486,15 +1487,15 @@ def : InstRW<[V2Wr_VDOT, V2Rd_VDOT],
def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
// ASIMD max/min, reduce, 4H/4S
-def : InstRW<[V2Write_2c_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
+def : InstRW<[V2Write_3c_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
"^[SU](MAX|MIN)Vv4i32v$")>;
// ASIMD max/min, reduce, 8B/8H
-def : InstRW<[V2Write_4c_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
+def : InstRW<[V2Write_5c_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
"^[SU](MAX|MIN)Vv8i16v$")>;
// ASIMD max/min, reduce, 16B
-def : InstRW<[V2Write_4c_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
+def : InstRW<[V2Write_6c_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
// ASIMD multiply
def : InstRW<[V2Write_4c_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
index dee46a304582b..b649a14e4cee0 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
@@ -1605,11 +1605,11 @@ add x0, x27, 1
# CHECK-NEXT: Block RThroughput: 5.7
# CHECK: Timeline view:
-# CHECK-NEXT: 0123
+# CHECK-NEXT: 01234
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
-# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1
# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
@@ -1651,7 +1651,7 @@ add x0, x27, 1
# CHECK-NEXT: Block RThroughput: 6.7
# CHECK: Timeline view:
-# CHECK-NEXT: 01234
+# CHECK-NEXT: 01234567
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeER. . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
@@ -1697,7 +1697,7 @@ add x0, x27, 1
# CHECK-NEXT: Block RThroughput: 6.7
# CHECK: Timeline view:
-# CHECK-NEXT: 01234
+# CHECK-NEXT: 01234567
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeER. . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
@@ -1744,7 +1744,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123
# CHECK: [0,0] DeeeeeeeER. . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
# CHECK-NEXT: [0,1] .DeE-----R. . . . add x0, x27, #1
@@ -2114,8 +2114,8 @@ add x0, x27, 1
# CHECK-NEXT: 012345
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
+# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1
# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28
@@ -2480,7 +2480,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012345678
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeeER . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
@@ -2526,7 +2526,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeeeeER . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: [0,1] .DeE------R . . . . add x0, x27, #1
@@ -2664,7 +2664,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012345678
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeeER . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
@@ -2710,7 +2710,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012345678
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeeER . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
@@ -2756,7 +2756,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012345678
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeeER . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
@@ -2802,7 +2802,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeeER . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
@@ -2848,7 +2848,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012345678
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
@@ -2894,7 +2894,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
@@ -3078,7 +3078,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012345678
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
@@ -3124,7 +3124,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012345678
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1
@@ -3170,7 +3170,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012345
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: [0,1] .DeE------R . add x0, x27, #1
@@ -3216,7 +3216,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . . ldp q1, q2, [x27], #992
# CHECK-NEXT: [0,1] .DeE----R . . add x0, x27, #1
@@ -3308,7 +3308,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27], #254
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
@@ -3354,7 +3354,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27, #254]!
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
@@ -3400,7 +3400,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER . ldr w1, [x27], #254
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
@@ -3446,7 +3446,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]!
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
@@ -3492,7 +3492,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]!
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
@@ -3853,7 +3853,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1
@@ -3899,7 +3899,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1
@@ -3945,7 +3945,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1
@@ -3991,7 +3991,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1
@@ -4037,7 +4037,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1
@@ -4083,7 +4083,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1
@@ -4129,7 +4129,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1
@@ -4175,7 +4175,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER . st1 { v1.b }[8], [x27], x28
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
@@ -4221,7 +4221,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER . st1 { v1.s }[0], [x27], #4
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
@@ -4267,7 +4267,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER . st2 { v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
@@ -4313,7 +4313,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 01
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER .. st2 { v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: [0,1] .DeE--R .. add x0, x27, #1
@@ -4359,7 +4359,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 01
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER .. st2 { v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1
@@ -4405,7 +4405,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER . st2 { v1.b, v2.b }[0], [x27], x28
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
@@ -4451,7 +4451,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER . st2 { v1.h, v2.h }[4], [x27], x28
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
@@ -4497,7 +4497,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeER . . . st2g x26, [x27], #4064
# CHECK-NEXT: [0,1] D=eER. . . add x0, x27, #1
@@ -4543,7 +4543,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . .. st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1
@@ -4589,7 +4589,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeER . .. st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: [0,1] .DeE---R . .. add x0, x27, #1
@@ -4635,7 +4635,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1
@@ -4681,7 +4681,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . .. st3 { v1.h, v2.h, v3.h }[0], [x27], #6
# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1
@@ -4727,7 +4727,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . .. st3 { v1.s, v2.s, v3.s }[0], [x27], x28
# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1
@@ -4773,7 +4773,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
# CHECK-NEXT: [0,1] .DeE----R . . . add x0, x27, #1
@@ -4819,7 +4819,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 01234567
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeER . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: [0,1] .DeE---R . . . add x0, x27, #1
@@ -4865,7 +4865,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012345678
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeER. . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: [0,1] . DeE----R. . . add x0, x27, #1
@@ -4911,7 +4911,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . .. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1
@@ -4957,7 +4957,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 012
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
# CHECK-NEXT: [0,1] .DeE----R . . add x0, x27, #1
@@ -5048,7 +5048,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . stp q1, q2, [x27], #992
# CHECK-NEXT: [0,1] D==eER . add x0, x27, #1
@@ -5319,7 +5319,7 @@ add x0, x27, 1
# CHECK: Timeline view:
# CHECK-NEXT: 0
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER . ldr x1, [x27], #254
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s
index 68a067eb8a360..0fd6bc73c461f 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s
@@ -1257,11 +1257,11 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 addhn2 v0.8h, v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 addp v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 2 0.25 addp v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 1 2 0.50 addv s0, v0.4s
-# CHECK-NEXT: 1 2 0.50 addv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 addv h0, v0.8h
-# CHECK-NEXT: 2 4 0.50 addv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 addv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 addv s0, v0.4s
+# CHECK-NEXT: 1 3 0.50 addv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 addv h0, v0.8h
+# CHECK-NEXT: 2 5 0.50 addv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 addv b0, v0.16b
# CHECK-NEXT: 1 2 0.25 aesd v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 aese v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 aesimc v0.16b, v0.16b
@@ -1779,11 +1779,11 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 saddlp v0.4h, v0.8b
# CHECK-NEXT: 1 2 0.25 saddlp v0.4s, v0.8h
# CHECK-NEXT: 1 2 0.25 saddlp v0.8h, v0.16b
-# CHECK-NEXT: 1 2 0.50 saddlv d0, v0.4s
-# CHECK-NEXT: 1 2 0.50 saddlv s0, v0.4h
-# CHECK-NEXT: 2 4 0.50 saddlv s0, v0.8h
-# CHECK-NEXT: 2 4 0.50 saddlv h0, v0.8b
-# CHECK-NEXT: 2 4 1.00 saddlv h0, v0.16b
+# CHECK-NEXT: 1 3 0.50 saddlv d0, v0.4s
+# CHECK-NEXT: 1 3 0.50 saddlv s0, v0.4h
+# CHECK-NEXT: 2 5 0.50 saddlv s0, v0.8h
+# CHECK-NEXT: 2 5 0.50 saddlv h0, v0.8b
+# CHECK-NEXT: 2 6 1.00 saddlv h0, v0.16b
# CHECK-NEXT: 1 2 0.25 saddw v0.2d, v0.2d, v0.2s
# CHECK-NEXT: 1 2 0.25 saddw v0.4s, v0.4s, v0.4h
# CHECK-NEXT: 1 2 0.25 saddw v0.8h, v0.8h, v0.8b
@@ -1846,22 +1846,22 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 smaxp v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 smaxp v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 2 0.25 smaxp v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 2 4 0.50 smaxv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 smaxv b0, v0.16b
-# CHECK-NEXT: 1 2 0.50 smaxv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 smaxv h0, v0.8h
-# CHECK-NEXT: 1 2 0.50 smaxv s0, v0.4s
+# CHECK-NEXT: 2 5 0.50 smaxv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 smaxv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 smaxv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 smaxv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 smaxv s0, v0.4s
# CHECK-NEXT: 1 2 0.25 smin v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 smin v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 smin v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 sminp v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 sminp v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 sminp v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: 2 4 0.50 sminv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 sminv b0, v0.16b
-# CHECK-NEXT: 1 2 0.50 sminv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 sminv h0, v0.8h
-# CHECK-NEXT: 1 2 0.50 sminv s0, v0.4s
+# CHECK-NEXT: 2 5 0.50 sminv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 sminv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 sminv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 sminv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 sminv s0, v0.4s
# CHECK-NEXT: 1 4 0.50 smlal v0.2d, v0.2s, v0.2s
# CHECK-NEXT: 1 4 0.50 smlal v0.4s, v0.4h, v0.4h
# CHECK-NEXT: 1 4 0.50 smlal v0.8h, v0.8b, v0.8b
@@ -2221,11 +2221,11 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 uaddlp v0.4h, v0.8b
# CHECK-NEXT: 1 2 0.25 uaddlp v0.4s, v0.8h
# CHECK-NEXT: 1 2 0.25 uaddlp v0.8h, v0.16b
-# CHECK-NEXT: 1 2 0.50 uaddlv d0, v0.4s
-# CHECK-NEXT: 1 2 0.50 uaddlv s0, v0.4h
-# CHECK-NEXT: 2 4 0.50 uaddlv s0, v0.8h
-# CHECK-NEXT: 2 4 0.50 uaddlv h0, v0.8b
-# CHECK-NEXT: 2 4 1.00 uaddlv h0, v0.16b
+# CHECK-NEXT: 1 3 0.50 uaddlv d0, v0.4s
+# CHECK-NEXT: 1 3 0.50 uaddlv s0, v0.4h
+# CHECK-NEXT: 2 5 0.50 uaddlv s0, v0.8h
+# CHECK-NEXT: 2 5 0.50 uaddlv h0, v0.8b
+# CHECK-NEXT: 2 6 1.00 uaddlv h0, v0.16b
# CHECK-NEXT: 1 2 0.25 uaddw v0.2d, v0.2d, v0.2s
# CHECK-NEXT: 1 2 0.25 uaddw v0.4s, v0.4s, v0.4h
# CHECK-NEXT: 1 2 0.25 uaddw v0.8h, v0.8h, v0.8b
@@ -2257,22 +2257,22 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 umaxp v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 umaxp v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 umaxp v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: 2 4 0.50 umaxv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 umaxv b0, v0.16b
-# CHECK-NEXT: 1 2 0.50 umaxv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 umaxv h0, v0.8h
-# CHECK-NEXT: 1 2 0.50 umaxv s0, v0.4s
+# CHECK-NEXT: 2 5 0.50 umaxv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 umaxv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 umaxv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 umaxv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 umaxv s0, v0.4s
# CHECK-NEXT: 1 2 0.25 umin v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 umin v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 2 0.25 umin v0.8b, v0.8b, v0.8b
# CHECK-NEXT: 1 2 0.25 uminp v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 uminp v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 2 0.25 uminp v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 2 4 0.50 uminv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 uminv b0, v0.16b
-# CHECK-NEXT: 1 2 0.50 uminv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 uminv h0, v0.8h
-# CHECK-NEXT: 1 2 0.50 uminv s0, v0.4s
+# CHECK-NEXT: 2 5 0.50 uminv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 uminv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 uminv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 uminv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 uminv s0, v0.4s
# CHECK-NEXT: 1 4 0.50 umlal v0.2d, v0.2s, v0.2s
# CHECK-NEXT: 1 4 0.50 umlal v0.4s, v0.4h, v0.4h
# CHECK-NEXT: 1 4 0.50 umlal v0.8h, v0.8b, v0.8b
More information about the llvm-commits
mailing list