[llvm] [AArch64] Initial sched model for Neoverse N3 (PR #106371)

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 19 07:00:35 PDT 2024


================
@@ -0,0 +1,2333 @@
+//=- AArch64SchedNeoverseN3.td - NeoverseN3 Scheduling Defs --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the Arm Neoverse N3 processors.
+//
+//===----------------------------------------------------------------------===//
+
+def NeoverseN3Model : SchedMachineModel {
+    let IssueWidth            =  10; // Micro-ops dispatched at a time.
+    let MicroOpBufferSize     = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2.
+    let LoadLatency           =   4; // Optimistic load latency.
+    let MispredictPenalty     =  10; // Extra cycles for mispredicted branch. NOTE: Copied from N2.
+    let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
+    let CompleteModel         =   1;
+
+    list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
+        [HasSVE2p1, HasSVEB16B16, HasPAuthLR, HasCPA, HasCSSC]);
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Neoverse N3.
+// Instructions are first fetched and then decoded into internal Macro-OPerations
+// (MOPs). From there, the MOPs proceed through register renaming and dispatch stages.
+// A MOP can be split into two Micro-OPerations (µOPs) further down the pipeline
+// after the decode stage. Once dispatched, µOPs wait for their operands and issue
+// out-of-order to one of thirteen issue pipelines. Each issue pipeline can accept
+// one µOP per cycle.
+
+let SchedModel = NeoverseN3Model in {
+
+// Define the (13) issue ports.
+def N3UnitB   : ProcResource<2>;  // Branch 0/1
+def N3UnitS   : ProcResource<2>;  // Integer Single-Cycle 0/1
+def N3UnitM0  : ProcResource<1>;  // Integer Single/Multi-Cycle 0
+def N3UnitM1  : ProcResource<1>;  // Integer Single/Multi-Cycle 1
+def N3UnitL01 : ProcResource<2>;  // Load/Store 0/1
+def N3UnitL2  : ProcResource<1>;  // Load 2
+def N3UnitD   : ProcResource<2>;  // Integer Store data 0/1
+def N3UnitV0  : ProcResource<1>;  // FP/ASIMD 0
+def N3UnitV1  : ProcResource<1>;  // FP/ASIMD 1
+
+def N3UnitV : ProcResGroup<[N3UnitV0, N3UnitV1]>;
+def N3UnitM : ProcResGroup<[N3UnitM0, N3UnitM1]>;
+def N3UnitL : ProcResGroup<[N3UnitL01, N3UnitL2]>;
+def N3UnitI : ProcResGroup<[N3UnitS, N3UnitM0, N3UnitM1]>;
+
+//===----------------------------------------------------------------------===//
+
+def : ReadAdvance<ReadI,       0>;
+def : ReadAdvance<ReadISReg,   0>;
+def : ReadAdvance<ReadIEReg,   0>;
+def : ReadAdvance<ReadIM,      0>;
+def : ReadAdvance<ReadIMA,     0>;
+def : ReadAdvance<ReadID,      0>;
+def : ReadAdvance<ReadExtrHi,  0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST,      0>;
+def : ReadAdvance<ReadVLD,     0>;
+
+// NOTE: Copied from N2.
+def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint,    []> { let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Neoverse N3.
+
+//===----------------------------------------------------------------------===//
+// Define generic 0 micro-op types
+
+def N3Write_0c : SchedWriteRes<[]> {
+    let Latency = 0;
+    let NumMicroOps = 0;
+}
+
+def N3Write_4c : SchedWriteRes<[]> {
+    let Latency = 4;
+    let NumMicroOps = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 1 micro-op types
+
+def N3Write_1c_1B   : SchedWriteRes<[N3UnitB]>   { let Latency = 1; }
+def N3Write_1c_1I   : SchedWriteRes<[N3UnitI]>   { let Latency = 1; }
+def N3Write_2c_1M   : SchedWriteRes<[N3UnitM]>   { let Latency = 2; }
+def N3Write_2c_1M0  : SchedWriteRes<[N3UnitM0]>  { let Latency = 2; }
+def N3Write_3c_1M   : SchedWriteRes<[N3UnitM]>   { let Latency = 3; }
+def N3Write_1c_1M   : SchedWriteRes<[N3UnitM]>   { let Latency = 1; }
+def N3Write_4c_1M   : SchedWriteRes<[N3UnitM]>   { let Latency = 4; }
+def N3Write_1c_1S   : SchedWriteRes<[N3UnitS]>   { let Latency = 1; }
+def N3Write_4c_1L   : SchedWriteRes<[N3UnitL]>   { let Latency = 4; }
+def N3Write_2c_1V   : SchedWriteRes<[N3UnitV]>   { let Latency = 2; }
+def N3Write_5c_1V0  : SchedWriteRes<[N3UnitV0]>  { let Latency = 5; }
+def N3Write_7c_1V0  : SchedWriteRes<[N3UnitV0]>  { let Latency = 7; }
+def N3Write_12c_1V0 : SchedWriteRes<[N3UnitV0]>  { let Latency = 12; }
+def N3Write_3c_1V   : SchedWriteRes<[N3UnitV]>   { let Latency = 3; }
+def N3Write_4c_1V   : SchedWriteRes<[N3UnitV]>   { let Latency = 4; }
+def N3Write_3c_1V0  : SchedWriteRes<[N3UnitV0]>  { let Latency = 3; }
+def N3Write_3c_1M0  : SchedWriteRes<[N3UnitM0]>  { let Latency = 3; }
+def N3Write_6c_1L   : SchedWriteRes<[N3UnitL]>   { let Latency = 6; }
+def N3Write_4c_1V1  : SchedWriteRes<[N3UnitV1]>  { let Latency = 4; }
+def N3Write_3c_1V1  : SchedWriteRes<[N3UnitV1]>  { let Latency = 3; }
+def N3Write_4c_1V0  : SchedWriteRes<[N3UnitV0]>  { let Latency = 4; }
+def N3Write_2c_1V0  : SchedWriteRes<[N3UnitV0]>  { let Latency = 2; }
+def N3Write_2c_1V1  : SchedWriteRes<[N3UnitV1]>  { let Latency = 2; }
+def N3Write_5c_1V   : SchedWriteRes<[N3UnitV]>   { let Latency = 5; }
+def N3Write_1c_1L01 : SchedWriteRes<[N3UnitL01]> { let Latency = 1; }
+
+def N3Write_12c_1M0_12 : SchedWriteRes<[N3UnitM0]> {
+    let Latency = 12;
+    let ReleaseAtCycles = [12];
+}
+
+def N3Write_20c_1M0_20 : SchedWriteRes<[N3UnitM0]> {
+    let Latency = 20;
+    let ReleaseAtCycles = [20];
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 2 micro-op types
+
+def N3Write_1c_1B_1S : SchedWriteRes<[N3UnitB, N3UnitS]> {
+    let Latency = 1;
+    let NumMicroOps = 2;
+}
+
+def N3Write_2c_1M_1B : SchedWriteRes<[N3UnitM, N3UnitB]> {
+    let Latency = 2;
+    let NumMicroOps = 2;
+}
+
+def N3Write_5c_1L_1S : SchedWriteRes<[N3UnitL, N3UnitS]> {
+    let Latency = 5;
+    let NumMicroOps = 2;
+}
+
+def N3Write_4c_2L : SchedWriteRes<[N3UnitL, N3UnitL]> {
+    let Latency = 4;
+    let NumMicroOps = 2;
+}
+
+def N3Write_1c_1L01_1D : SchedWriteRes<[N3UnitL01, N3UnitD]> {
+    let Latency = 1;
+    let NumMicroOps = 2;
+}
+
+def N3Write_5c_1L_1I : SchedWriteRes<[N3UnitL, N3UnitI]> {
+    let Latency = 5;
+    let NumMicroOps = 2;
+}
+
+def N3Write_6c_2L : SchedWriteRes<[N3UnitL, N3UnitL]> {
+    let Latency = 6;
+    let NumMicroOps = 2;
+}
+
+def N3Write_2c_1L01_1V : SchedWriteRes<[N3UnitL01, N3UnitV]> {
+    let Latency = 2;
+    let NumMicroOps = 2;
+}
+
+def N3Write_6c_2V1 : SchedWriteRes<[N3UnitV1, N3UnitV1]> {
+    let Latency = 6;
+    let NumMicroOps = 2;
+}
+
+def N3Write_4c_2V0 : SchedWriteRes<[N3UnitV0, N3UnitV0]> {
+    let Latency = 4;
+    let NumMicroOps = 2;
+}
+
+def N3Write_8c_2V0 : SchedWriteRes<[N3UnitV0, N3UnitV0]> {
+    let Latency = 8;
+    let NumMicroOps = 2;
+}
+
+def N3Write_13c_2V0 : SchedWriteRes<[N3UnitV0, N3UnitV0]> {
+    let Latency = 13;
+    let NumMicroOps = 2;
+}
+
+def N3Write_4c_2V : SchedWriteRes<[N3UnitV, N3UnitV]> {
+    let Latency = 4;
+    let NumMicroOps = 2;
+}
+
+def N3Write_2c_2V : SchedWriteRes<[N3UnitV, N3UnitV]> {
+    let Latency = 2;
+    let NumMicroOps = 2;
+}
+
+def N3Write_8c_1L_1V : SchedWriteRes<[N3UnitL, N3UnitV]> {
+    let Latency = 8;
+    let NumMicroOps = 2;
+}
+
+def N3Write_2c_1V_1L01 : SchedWriteRes<[N3UnitV, N3UnitL01]> {
+    let Latency = 2;
+    let NumMicroOps = 2;
+}
+
+def N3Write_5c_2V0 : SchedWriteRes<[N3UnitV0, N3UnitV0]> {
+    let Latency = 5;
+    let NumMicroOps = 2;
+}
+
+def N3Write_7c_1L_1M : SchedWriteRes<[N3UnitL, N3UnitM]> {
+    let Latency = 7;
+    let NumMicroOps = 2;
+}
+
+def N3Write_8c_1V_1L : SchedWriteRes<[N3UnitV, N3UnitL]> {
+    let Latency = 8;
+    let NumMicroOps = 2;
+}
+
+def N3Write_5c_1M0_1V : SchedWriteRes<[N3UnitM0, N3UnitV]> {
+    let Latency = 5;
+    let NumMicroOps = 2;
+}
+
+def N3Write_5c_1V1_1V : SchedWriteRes<[N3UnitV1, N3UnitV]> {
+    let Latency = 5;
+    let NumMicroOps = 2;
+}
+
+def N3Write_8c_1M0_1V : SchedWriteRes<[N3UnitM0, N3UnitV]> {
+    let Latency = 8;
+    let NumMicroOps = 2;
+}
+
+def N3Write_5c_1M_1L : SchedWriteRes<[N3UnitM, N3UnitL]> {
+    let Latency = 5;
+    let NumMicroOps = 2;
+}
+
+def N3Write_7c_1V_1V1 : SchedWriteRes<[N3UnitV, N3UnitV1]> {
+    let Latency = 7;
+    let NumMicroOps = 2;
+}
+
+def N3Write_5c_1V_1V1 : SchedWriteRes<[N3UnitV, N3UnitV1]> {
+    let Latency = 5;
+    let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 3 micro-op types
+
+def N3Write_3c_1L01_1V_1I : SchedWriteRes<[N3UnitL01, N3UnitV, N3UnitI]> {
+    let Latency = 3;
+    let NumMicroOps = 3;
+}
+
+def N3Write_2c_1L01_1V_1I : SchedWriteRes<[N3UnitL01, N3UnitV, N3UnitI]> {
+    let Latency = 2;
+    let NumMicroOps = 3;
+}
+
+def N3Write_6c_3V : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 6;
+    let NumMicroOps = 3;
+}
+
+def N3Write_4c_3V : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 4;
+    let NumMicroOps = 3;
+}
+
+def N3Write_6c_3L : SchedWriteRes<[N3UnitL, N3UnitL, N3UnitL]> {
+    let Latency = 6;
+    let NumMicroOps = 3;
+}
+
+def N3Write_8c_2L_1V : SchedWriteRes<[N3UnitL, N3UnitL, N3UnitV]> {
+    let Latency = 8;
+    let NumMicroOps = 3;
+}
+
+def N3Write_7c_1M_1M0_1V : SchedWriteRes<[N3UnitM, N3UnitM0, N3UnitV]> {
+    let Latency = 7;
+    let NumMicroOps = 3;
+}
+
+def N3Write_5c_1M_1L_1I : SchedWriteRes<[N3UnitM, N3UnitL, N3UnitI]> {
+    let Latency = 5;
+    let NumMicroOps = 3;
+}
+
+def N3Write_4c_1I_2L : SchedWriteRes<[N3UnitI, N3UnitL, N3UnitL]> {
+    let Latency = 4;
+    let NumMicroOps = 3;
+}
+
+def N3Write_1c_1L01_1D_1I : SchedWriteRes<[N3UnitL01, N3UnitD, N3UnitI]> {
+    let Latency = 1;
+    let NumMicroOps = 3;
+}
+
+def N3Write_2c_1L01_1I_1V : SchedWriteRes<[N3UnitL01, N3UnitI, N3UnitV]> {
+    let Latency = 2;
+    let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 4 micro-op types
+
+def N3Write_8c_2V_2V1 : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV1, N3UnitV1]> {
+    let Latency = 8;
+    let NumMicroOps = 4;
+}
+
+def N3Write_6c_2I_2L : SchedWriteRes<[N3UnitI, N3UnitI, N3UnitL, N3UnitL]> {
+    let Latency = 6;
+    let NumMicroOps = 4;
+}
+
+def N3Write_6c_4V0 : SchedWriteRes<[N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0]> {
+    let Latency = 6;
+    let NumMicroOps = 4;
+}
+
+def N3Write_8c_4V0 : SchedWriteRes<[N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0]> {
+    let Latency = 8;
+    let NumMicroOps = 4;
+}
+
+def N3Write_10c_4V0 : SchedWriteRes<[N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0]> {
+    let Latency = 10;
+    let NumMicroOps = 4;
+}
+
+def N3Write_6c_4V : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 6;
+    let NumMicroOps = 4;
+}
+
+def N3Write_7c_4L : SchedWriteRes<[N3UnitL, N3UnitL, N3UnitL, N3UnitL]> {
+    let Latency = 7;
+    let NumMicroOps = 4;
+}
+
+def N3Write_2c_2L01_2V : SchedWriteRes<[N3UnitL01, N3UnitL01, N3UnitV, N3UnitV]> {
+    let Latency = 2;
+    let NumMicroOps = 4;
+}
+
+def N3Write_4c_2V_2L01 : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitL01, N3UnitL01]> {
+    let Latency = 4;
+    let NumMicroOps = 4;
+}
+
+def N3Write_2c_2V_2L01 : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitL01, N3UnitL01]> {
+    let Latency = 2;
+    let NumMicroOps = 4;
+}
+
+def N3Write_8c_4V : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 8;
+    let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 6 micro-op types
+
+def N3Write_4c_3V_3L01 : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV,
+                                        N3UnitL01, N3UnitL01, N3UnitL01]> {
+    let Latency = 4;
+    let NumMicroOps = 6;
+}
+
+def N3Write_2c_3V_3L01 : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV,
+                                        N3UnitL01, N3UnitL01, N3UnitL01]> {
+    let Latency = 2;
+    let NumMicroOps = 6;
+}
+
+def N3Write_4c_3L01_3V : SchedWriteRes<[N3UnitL01, N3UnitL01, N3UnitL01,
+                                        N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 4;
+    let NumMicroOps = 6;
+}
+
+def N3Write_3c_3L01_3V : SchedWriteRes<[N3UnitL01, N3UnitL01, N3UnitL01,
+                                        N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 3;
+    let NumMicroOps = 6;
+}
+
+def N3Write_6c_3L01_3V : SchedWriteRes<[N3UnitL01, N3UnitL01, N3UnitL01,
+                                        N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 6;
+    let NumMicroOps = 6;
+}
+
+def N3Write_8c_3L_3V : SchedWriteRes<[N3UnitL, N3UnitL, N3UnitL,
+                                      N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 8;
+    let NumMicroOps = 6;
+}
+
+def N3Write_10c_3L_3V : SchedWriteRes<[N3UnitL, N3UnitL, N3UnitL,
+                                       N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 10;
+    let NumMicroOps = 6;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 7 micro-op types
+
+def N3Write_8c_4L_3V : SchedWriteRes<[N3UnitL, N3UnitL, N3UnitL, N3UnitL,
+                                      N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 8;
+    let NumMicroOps = 7;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 8 micro-op types
+
+def N3Write_12c_8V0 : SchedWriteRes<[N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0,
+                                     N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0]> {
+    let Latency = 12;
+    let NumMicroOps = 8;
+}
+
+def N3Write_4c_4V_4L01 : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV, N3UnitV,
+                                        N3UnitL01, N3UnitL01, N3UnitL01, N3UnitL01]> {
+    let Latency = 4;
+    let NumMicroOps = 8;
+}
+
+def N3Write_8c_8V0 : SchedWriteRes<[N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0,
+                                    N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0]> {
+    let Latency = 8;
+    let NumMicroOps = 8;
+}
+
+def N3Write_16c_8V : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV, N3UnitV,
+                                    N3UnitV, N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 16;
+    let NumMicroOps = 8;
+}
+
+def N3Write_3c_4L01_4V : SchedWriteRes<[N3UnitL01, N3UnitL01, N3UnitL01, N3UnitL01,
+                                        N3UnitV, N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 3;
+    let NumMicroOps = 8;
+}
+
+def N3Write_8c_4L_4V : SchedWriteRes<[N3UnitL, N3UnitL, N3UnitL, N3UnitL,
+                                      N3UnitV, N3UnitV, N3UnitV, N3UnitV]> {
+    let Latency = 8;
+    let NumMicroOps = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 9 micro-op types
+
+def N3Write_10c_6V_3L : SchedWriteRes<[N3UnitV, N3UnitV, N3UnitV,
+                                       N3UnitV, N3UnitV, N3UnitV,
+                                       N3UnitL, N3UnitL, N3UnitL]> {
+    let Latency = 10;
+    let NumMicroOps = 9;
+}
+
+def N3Write_4c_3L01_3I_3V : SchedWriteRes<[N3UnitL01, N3UnitL01, N3UnitL01,
----------------
davemgreen wrote:

Oh I see. They might be wrong too, but I'm just guessing from what seems plausible and we don't have any official documentation one way or the other. Lets keep it like this then for the moment, and if we need to change it we can change them all.

Are you happy for me to hit submit?

https://github.com/llvm/llvm-project/pull/106371


More information about the llvm-commits mailing list