[llvm] 70bc7d1 - [AArch64] Corrected Latency Descriptions for NeoverseV2 (#147339)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 14 03:54:01 PDT 2025
Author: YafetBeyene
Date: 2025-07-14T11:53:58+01:00
New Revision: 70bc7d1b0893f1d28aaf31ccffb07d51320885cb
URL: https://github.com/llvm/llvm-project/commit/70bc7d1b0893f1d28aaf31ccffb07d51320885cb
DIFF: https://github.com/llvm/llvm-project/commit/70bc7d1b0893f1d28aaf31ccffb07d51320885cb.diff
LOG: [AArch64] Corrected Latency Descriptions for NeoverseV2 (#147339)
Update the Neoverse V2 Scheduler to reflect the correct
latencies along with having updated the relevant mca tests.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index 8d3a4553d4b73..b2c3da03b4b84 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -157,6 +157,7 @@ def V2Write_20c_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 20;
def V2Write_2c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 2; }
def V2Write_2c_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 2; }
def V2Write_3c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 3; }
+def V2Write_3c_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 3; }
def V2Write_4c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 4; }
def V2Write_4c_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
def V2Write_6c_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 6; }
@@ -256,8 +257,8 @@ def V2Write_4c_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
let NumMicroOps = 2;
}
-def V2Write_4c_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
- let Latency = 4;
+def V2Write_5c_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
+ let Latency = 5;
let NumMicroOps = 2;
}
@@ -376,8 +377,8 @@ def V2Write_6c_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> {
let NumMicroOps = 2;
}
-def V2Write_4c_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
- let Latency = 4;
+def V2Write_6c_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
+ let Latency = 6;
let NumMicroOps = 2;
}
@@ -1468,14 +1469,14 @@ def : SchedAlias<WriteVq, V2Write_2c_1V>;
def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>;
// ASIMD arith, reduce, 4H/4S
-def : InstRW<[V2Write_2c_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
+def : InstRW<[V2Write_3c_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
// ASIMD arith, reduce, 8B/8H
-def : InstRW<[V2Write_4c_1V13_1V],
+def : InstRW<[V2Write_5c_1V13_1V],
(instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
// ASIMD arith, reduce, 16B
-def : InstRW<[V2Write_4c_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
+def : InstRW<[V2Write_6c_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
// ASIMD dot product
// ASIMD dot product using signed and unsigned integers
@@ -1486,15 +1487,15 @@ def : InstRW<[V2Wr_VDOT, V2Rd_VDOT],
def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
// ASIMD max/min, reduce, 4H/4S
-def : InstRW<[V2Write_2c_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
+def : InstRW<[V2Write_3c_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
"^[SU](MAX|MIN)Vv4i32v$")>;
// ASIMD max/min, reduce, 8B/8H
-def : InstRW<[V2Write_4c_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
+def : InstRW<[V2Write_5c_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
"^[SU](MAX|MIN)Vv8i16v$")>;
// ASIMD max/min, reduce, 16B
-def : InstRW<[V2Write_4c_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
+def : InstRW<[V2Write_6c_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
// ASIMD multiply
def : InstRW<[V2Write_4c_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s
index 68a067eb8a360..0fd6bc73c461f 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s
@@ -1257,11 +1257,11 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 addhn2 v0.8h, v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 addp v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 2 0.25 addp v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 1 2 0.50 addv s0, v0.4s
-# CHECK-NEXT: 1 2 0.50 addv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 addv h0, v0.8h
-# CHECK-NEXT: 2 4 0.50 addv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 addv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 addv s0, v0.4s
+# CHECK-NEXT: 1 3 0.50 addv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 addv h0, v0.8h
+# CHECK-NEXT: 2 5 0.50 addv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 addv b0, v0.16b
# CHECK-NEXT: 1 2 0.25 aesd v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 aese v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 aesimc v0.16b, v0.16b
@@ -1779,11 +1779,11 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 saddlp v0.4h, v0.8b
# CHECK-NEXT: 1 2 0.25 saddlp v0.4s, v0.8h
# CHECK-NEXT: 1 2 0.25 saddlp v0.8h, v0.16b
-# CHECK-NEXT: 1 2 0.50 saddlv d0, v0.4s
-# CHECK-NEXT: 1 2 0.50 saddlv s0, v0.4h
-# CHECK-NEXT: 2 4 0.50 saddlv s0, v0.8h
-# CHECK-NEXT: 2 4 0.50 saddlv h0, v0.8b
-# CHECK-NEXT: 2 4 1.00 saddlv h0, v0.16b
+# CHECK-NEXT: 1 3 0.50 saddlv d0, v0.4s
+# CHECK-NEXT: 1 3 0.50 saddlv s0, v0.4h
+# CHECK-NEXT: 2 5 0.50 saddlv s0, v0.8h
+# CHECK-NEXT: 2 5 0.50 saddlv h0, v0.8b
+# CHECK-NEXT: 2 6 1.00 saddlv h0, v0.16b
# CHECK-NEXT: 1 2 0.25 saddw v0.2d, v0.2d, v0.2s
# CHECK-NEXT: 1 2 0.25 saddw v0.4s, v0.4s, v0.4h
# CHECK-NEXT: 1 2 0.25 saddw v0.8h, v0.8h, v0.8b
@@ -1846,22 +1846,22 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 smaxp v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 smaxp v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 2 0.25 smaxp v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 2 4 0.50 smaxv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 smaxv b0, v0.16b
-# CHECK-NEXT: 1 2 0.50 smaxv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 smaxv h0, v0.8h
-# CHECK-NEXT: 1 2 0.50 smaxv s0, v0.4s
+# CHECK-NEXT: 2 5 0.50 smaxv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 smaxv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 smaxv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 smaxv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 smaxv s0, v0.4s
# CHECK-NEXT: 1 2 0.25 smin v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 smin v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 smin v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 sminp v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 sminp v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 sminp v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: 2 4 0.50 sminv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 sminv b0, v0.16b
-# CHECK-NEXT: 1 2 0.50 sminv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 sminv h0, v0.8h
-# CHECK-NEXT: 1 2 0.50 sminv s0, v0.4s
+# CHECK-NEXT: 2 5 0.50 sminv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 sminv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 sminv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 sminv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 sminv s0, v0.4s
# CHECK-NEXT: 1 4 0.50 smlal v0.2d, v0.2s, v0.2s
# CHECK-NEXT: 1 4 0.50 smlal v0.4s, v0.4h, v0.4h
# CHECK-NEXT: 1 4 0.50 smlal v0.8h, v0.8b, v0.8b
@@ -2221,11 +2221,11 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 uaddlp v0.4h, v0.8b
# CHECK-NEXT: 1 2 0.25 uaddlp v0.4s, v0.8h
# CHECK-NEXT: 1 2 0.25 uaddlp v0.8h, v0.16b
-# CHECK-NEXT: 1 2 0.50 uaddlv d0, v0.4s
-# CHECK-NEXT: 1 2 0.50 uaddlv s0, v0.4h
-# CHECK-NEXT: 2 4 0.50 uaddlv s0, v0.8h
-# CHECK-NEXT: 2 4 0.50 uaddlv h0, v0.8b
-# CHECK-NEXT: 2 4 1.00 uaddlv h0, v0.16b
+# CHECK-NEXT: 1 3 0.50 uaddlv d0, v0.4s
+# CHECK-NEXT: 1 3 0.50 uaddlv s0, v0.4h
+# CHECK-NEXT: 2 5 0.50 uaddlv s0, v0.8h
+# CHECK-NEXT: 2 5 0.50 uaddlv h0, v0.8b
+# CHECK-NEXT: 2 6 1.00 uaddlv h0, v0.16b
# CHECK-NEXT: 1 2 0.25 uaddw v0.2d, v0.2d, v0.2s
# CHECK-NEXT: 1 2 0.25 uaddw v0.4s, v0.4s, v0.4h
# CHECK-NEXT: 1 2 0.25 uaddw v0.8h, v0.8h, v0.8b
@@ -2257,22 +2257,22 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 umaxp v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 umaxp v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 umaxp v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: 2 4 0.50 umaxv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 umaxv b0, v0.16b
-# CHECK-NEXT: 1 2 0.50 umaxv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 umaxv h0, v0.8h
-# CHECK-NEXT: 1 2 0.50 umaxv s0, v0.4s
+# CHECK-NEXT: 2 5 0.50 umaxv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 umaxv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 umaxv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 umaxv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 umaxv s0, v0.4s
# CHECK-NEXT: 1 2 0.25 umin v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 umin v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 2 0.25 umin v0.8b, v0.8b, v0.8b
# CHECK-NEXT: 1 2 0.25 uminp v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 uminp v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 2 0.25 uminp v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 2 4 0.50 uminv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 uminv b0, v0.16b
-# CHECK-NEXT: 1 2 0.50 uminv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 uminv h0, v0.8h
-# CHECK-NEXT: 1 2 0.50 uminv s0, v0.4s
+# CHECK-NEXT: 2 5 0.50 uminv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 uminv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 uminv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 uminv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 uminv s0, v0.4s
# CHECK-NEXT: 1 4 0.50 umlal v0.2d, v0.2s, v0.2s
# CHECK-NEXT: 1 4 0.50 umlal v0.4s, v0.4h, v0.4h
# CHECK-NEXT: 1 4 0.50 umlal v0.8h, v0.8b, v0.8b
More information about the llvm-commits
mailing list