[llvm] [AArch64] Added scheduling information for flag manipulation instructions in Neoverse-V2 (PR #139765)

Philipp Rados via llvm-commits llvm-commits at lists.llvm.org
Tue May 13 09:55:14 PDT 2025


https://github.com/PhilippRados created https://github.com/llvm/llvm-project/pull/139765

Fixes #122124

I changed the utilized pipeline to pipeline F (and the flagset) to match the SWOG specification. In order to achieve a throughput=1 even when using multiple num-units I increased the `ReleaseAtCycles` field of both F- and Flag-ProcResources in a new separate def.
I also added these instructions to the Neoverse-V2 lit-tests.

One thing I noticed is that all other defs that set a specific `ReleaseAtCycles` value, set it to be lower than the latency. However in this case the latency should still be 1 while the throughput is also 1, which can only be achieved if `ReleaseAtCycles > Latency`. I'm not sure if this is an issue.
Another thing is that the resource pressure got larger and I'm not sure if this is wanted or if the increase in `ReleaseAtCycles` is too severe.

Since this same format is used in the "Convert floating-point condition flags" instructions (AXFLAG, XAFLAG) it would be easy to add the scheduling info for those instructions too, if needed.

>From 3e61b5638ee23fad26046d0faef3581bc2ce34e0 Mon Sep 17 00:00:00 2001
From: PhilippR <phil.rados at gmail.com>
Date: Tue, 13 May 2025 18:38:35 +0200
Subject: [PATCH] [AArch64] Added scheduling information for flag manipulation
 instructions in Neoverse-V2

---
 .../Target/AArch64/AArch64SchedNeoverseV2.td  |  8 +++++++-
 .../AArch64/Neoverse/V2-basic-instructions.s  | 19 ++++++++++++++++++-
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index 39f7077ae4514..f854c4ad29984 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -99,6 +99,8 @@ def V2Write_0c : SchedWriteRes<[]> { let Latency = 0; }
 def V2Write_1c_1B    : SchedWriteRes<[V2UnitB]>   { let Latency = 1; }
 def V2Write_1c_1F    : SchedWriteRes<[V2UnitF]>   { let Latency = 1; }
 def V2Write_1c_1F_1Flg : SchedWriteRes<[V2UnitF, V2UnitFlg]>   { let Latency = 1; }
+def V2Write_1c_1F_1Flg_1T : SchedWriteRes<[V2UnitF, V2UnitFlg]>   { let Latency = 1;
+                                                         let ReleaseAtCycles = [4, 3]; }
 def V2Write_1c_1I    : SchedWriteRes<[V2UnitI]>   { let Latency = 1; }
 def V2Write_1c_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 1; }
 def V2Write_1c_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 1; }
@@ -1136,9 +1138,13 @@ def : InstRW<[V2Write_2c_1M], (instrs ADDG, SUBG)>;
 def : InstRW<[V2Write_1c_1F_1Flg], (instregex "^CCM[NP][WX][ir]")>;
 
 // Convert floating-point condition flags
-// Flag manipulation instructions
 def : WriteRes<WriteSys, []> { let Latency = 1; }
 
+// Flag manipulation instructions
+// NOTE: Releases its resources later in order to model throughput=1
+def : InstRW<[V2Write_1c_1F_1Flg_1T], (instregex "^(RMIF|CFINV)",
+                                                 "^SETF(8|16)")>;
+
 // Insert Random Tags
 def : InstRW<[V2Write_2c_1M], (instrs IRG, IRGstack)>;
 
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
index 581dad6b68dcf..f06e3147de491 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
@@ -442,6 +442,15 @@ cneg     x9, xzr, lt
 csneg    x4, x8, x8, al
 csinv    w9, w8, w8, nv
 
+#------------------------------------------------------------------------------
+# Flag manipulation instructions
+#------------------------------------------------------------------------------
+
+rmif
+cfinv
+setf8 w1
+setf16 w1
+
 #------------------------------------------------------------------------------
 # Data-processing (1 source)
 #------------------------------------------------------------------------------
@@ -1787,6 +1796,10 @@ drps
 # CHECK-NEXT:  1      1     0.17                        cneg	x9, xzr, lt
 # CHECK-NEXT:  1      1     0.17                        csneg	x4, x8, x8, al
 # CHECK-NEXT:  1      1     0.17                        csinv	w9, w8, w8, nv
+# CHECK-NEXT:  1      1     1.00                  U     rmif	#0, #0, #0
+# CHECK-NEXT:  1      1     1.00                  U     cfinv
+# CHECK-NEXT:  1      1     1.00                  U     setf8	w1
+# CHECK-NEXT:  1      1     1.00                  U     setf16	w1
 # CHECK-NEXT:  1      1     0.17                        rbit	w0, w7
 # CHECK-NEXT:  1      1     0.17                        rbit	x18, x3
 # CHECK-NEXT:  1      1     0.17                        rev16	w17, w1
@@ -2592,7 +2605,7 @@ drps
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2.0]  [2.1]  [2.2]  [3]    [4.0]  [4.1]  [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   [14]
-# CHECK-NEXT: 11.00  11.00  33.00  33.00  54.00  54.00  54.00  99.00  165.00 165.00 329.92 184.92 112.92 112.92 86.17  86.17  190.00 146.00 30.00  10.00
+# CHECK-NEXT: 11.00  11.00  33.00  33.00  58.00  58.00  58.00  99.00  165.00 165.00 333.92 188.92 116.92 116.92 86.17  86.17  190.00 146.00 30.00  10.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2.0]  [2.1]  [2.2]  [3]    [4.0]  [4.1]  [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   [14]   Instructions:
@@ -2992,6 +3005,10 @@ drps
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     cneg	x9, xzr, lt
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     csneg	x4, x8, x8, al
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     csinv	w9, w8, w8, nv
+# CHECK-NEXT:  -      -      -      -     1.00   1.00   1.00    -      -      -     1.00   1.00   1.00   1.00    -      -      -      -      -      -     rmif	#0, #0, #0
+# CHECK-NEXT:  -      -      -      -     1.00   1.00   1.00    -      -      -     1.00   1.00   1.00   1.00    -      -      -      -      -      -     cfinv
+# CHECK-NEXT:  -      -      -      -     1.00   1.00   1.00    -      -      -     1.00   1.00   1.00   1.00    -      -      -      -      -      -     setf8	w1
+# CHECK-NEXT:  -      -      -      -     1.00   1.00   1.00    -      -      -     1.00   1.00   1.00   1.00    -      -      -      -      -      -     setf16	w1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     rbit	w0, w7
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     rbit	x18, x3
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     rev16	w17, w1



More information about the llvm-commits mailing list