[llvm] [AArch64] C1-Ultra Scheduling model (PR #182251)

Wed Mar 4 09:14:36 PST 2026

================
@@ -0,0 +1,2370 @@
+//=- AArch64SchedC1Ultra.td - C1 Ultra Scheduling Defs --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the C1 Ultra processors.
+// Information is taken from the C1 Ultra Software Optimization Guide
+// 
+// https://developer.arm.com/documentation/111079/3-0
+//
+//===----------------------------------------------------------------------===//
+
+def C1UltraModel : SchedMachineModel {
+  let IssueWidth            =  10;
+  let MicroOpBufferSize     = 320; // Entries in micro-op re-order buffer. NOTE: Copied from Neoverse-V3
+  let LoadLatency           =   4; // Optimistic load latency. NOTE: Copied from Neoverse-V3
+  let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.  NOTE: Copied from N3.
+  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
+  let CompleteModel         =   1;
+
+  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
+                                                    [HasSVE2p1, HasSVEB16B16,
+                                                     HasCPA, HasCSSC]);
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on C1 Ultra.
+// Instructions are first fetched and then decoded into internal macro-ops
+// (MOPs). From there, the MOPs proceed through register renaming and dispatch
+// stages. A MOP can be split into two micro-ops further down the pipeline
+// after the decode stage.
+
+let SchedModel = C1UltraModel in {
+
+// Define the issue ports.
+def C1UUnitB      : ProcResource<3>;  // Branch 0/1/2
+def C1UUnitS0     : ProcResource<1>;  // Integer single-cycle 0
+def C1UUnitS1     : ProcResource<1>;  // Integer single-cycle 1
+def C1UUnitS2     : ProcResource<1>;  // Integer single-cycle 2
+def C1UUnitS3     : ProcResource<1>;  // Integer single-cycle 3
+def C1UUnitS4     : ProcResource<1>;  // Integer single-cycle 4
+def C1UUnitS5     : ProcResource<1>;  // Integer single-cycle 5
+def C1UUnitM0     : ProcResource<1>;  // Integer single/multicycle 0
+def C1UUnitM1     : ProcResource<1>;  // Integer single/multicycle 1
+def C1UUnitV0     : ProcResource<1>;  // FP/ASIMD 0
+def C1UUnitV1     : ProcResource<1>;  // FP/ASIMD 1
+def C1UUnitV2     : ProcResource<1>;  // FP/ASIMD 2
+def C1UUnitV3     : ProcResource<1>;  // FP/ASIMD 3
+def C1UUnitV4     : ProcResource<1>;  // FP/ASIMD 4
+def C1UUnitV5     : ProcResource<1>;  // FP/ASIMD 5
+def C1UUnitLS0    : ProcResource<1>;  // Load/Store 0
+def C1UUnitLS1    : ProcResource<1>;  // Load/Store 1
+def C1UUnitL23    : ProcResource<2>;  // Load 2/3
+def C1UUnitD      : ProcResource<2>;  // Store data 0/1
+def C1UUnitCME    : ProcResource<1>;  // CME operations block
+def C1UUnitFlg    : ProcResource<4>;  // Flags
+
+def C1UUnitS      : ProcResGroup<[C1UUnitS0, C1UUnitS1, C1UUnitS2, C1UUnitS3,
+                                  C1UUnitS4, C1UUnitS5]>;
+def C1UUnitI      : ProcResGroup<[C1UUnitS0, C1UUnitS1, C1UUnitS2, C1UUnitS3,
+                                  C1UUnitS4, C1UUnitS5, C1UUnitM0, C1UUnitM1]>;
+def C1UUnitI4     : ProcResGroup<[C1UUnitS0, C1UUnitS2, C1UUnitS4, C1UUnitM0]>;
+def C1UUnitM      : ProcResGroup<[C1UUnitM0, C1UUnitM1]>;
+def C1UUnitL      : ProcResGroup<[C1UUnitLS0, C1UUnitLS1, C1UUnitL23]>;
+def C1UUnitSA     : ProcResGroup<[C1UUnitLS0, C1UUnitLS1]>;
+def C1UUnitV      : ProcResGroup<[C1UUnitV0, C1UUnitV1, C1UUnitV2, 
+                                  C1UUnitV3, C1UUnitV4, C1UUnitV5]>;
+def C1UUnitV01    : ProcResGroup<[C1UUnitV0, C1UUnitV1]>;
+def C1UUnitV02    : ProcResGroup<[C1UUnitV0, C1UUnitV2]>;
+def C1UUnitV13    : ProcResGroup<[C1UUnitV1, C1UUnitV3]>;
+def C1UUnitV0123  : ProcResGroup<[C1UUnitV0, C1UUnitV1, 
+                                  C1UUnitV2, C1UUnitV3]>;
+def C1UUnitV0134  : ProcResGroup<[C1UUnitV0, C1UUnitV1, C1UUnitV3, C1UUnitV4]>;
+
+// Define commonly used read types.
+def : ReadAdvance<ReadI,       0>;
+def : ReadAdvance<ReadISReg,   0>;
+def : ReadAdvance<ReadIEReg,   0>;
+def : ReadAdvance<ReadIM,      0>;
+def : ReadAdvance<ReadIMA,     0>;
+def : ReadAdvance<ReadID,      0>;
+def : ReadAdvance<ReadExtrHi,  0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST,      0>;
+def : ReadAdvance<ReadVLD,     0>;
+
+// NOTE: Copied from NeoverseC1U
----------------
walkerkd wrote:

NeoverseC1U?   Guessing it should be Neoverse-V3

https://github.com/llvm/llvm-project/pull/182251