[llvm] [AArch64] C1-Ultra Scheduling model (PR #182251)

Nashe Mncube via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 16 04:45:25 PDT 2026


================
@@ -0,0 +1,2409 @@
+//=- AArch64SchedC1Ultra.td - C1 Ultra Scheduling Defs --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the C1 Ultra processors.
+// Information is taken from the C1 Ultra Software Optimization Guide
+// 
+// https://developer.arm.com/documentation/111079/3-0
+//
+//===----------------------------------------------------------------------===//
+
+def C1UltraModel : SchedMachineModel {
+  let IssueWidth            =  10;
+  let MicroOpBufferSize     = 320; // Entries in micro-op re-order buffer. NOTE: Copied from Neoverse-V3
+  let LoadLatency           =   4; // Optimistic load latency. NOTE: Copied from Neoverse-V3
+  let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.  NOTE: Copied from N3.
+  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
+  let CompleteModel         =   1;
+
+  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
+                                                    [HasSVE2p1, HasSVEB16B16,
+                                                     HasCPA, HasCSSC]);
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on C1 Ultra.
+// Instructions are first fetched and then decoded into internal macro-ops
+// (MOPs). From there, the MOPs proceed through register renaming and dispatch
+// stages. A MOP can be split into two micro-ops further down the pipeline
+// after the decode stage.
+
+let SchedModel = C1UltraModel in {
+
+// Define the issue ports.
+def C1UUnitB      : ProcResource<3>;  // Branch 0/1/2
+def C1UUnitS0     : ProcResource<1>;  // Integer single-cycle 0
+def C1UUnitS1     : ProcResource<1>;  // Integer single-cycle 1
+def C1UUnitS2     : ProcResource<1>;  // Integer single-cycle 2
+def C1UUnitS3     : ProcResource<1>;  // Integer single-cycle 3
+def C1UUnitS4     : ProcResource<1>;  // Integer single-cycle 4
+def C1UUnitS5     : ProcResource<1>;  // Integer single-cycle 5
+def C1UUnitM0     : ProcResource<1>;  // Integer single/multicycle 0
+def C1UUnitM1     : ProcResource<1>;  // Integer single/multicycle 1
+def C1UUnitV0     : ProcResource<1>;  // FP/ASIMD 0
+def C1UUnitV1     : ProcResource<1>;  // FP/ASIMD 1
+def C1UUnitV2     : ProcResource<1>;  // FP/ASIMD 2
+def C1UUnitV3     : ProcResource<1>;  // FP/ASIMD 3
+def C1UUnitV4     : ProcResource<1>;  // FP/ASIMD 4
+def C1UUnitV5     : ProcResource<1>;  // FP/ASIMD 5
+def C1UUnitLS0    : ProcResource<1>;  // Load/Store 0
+def C1UUnitLS1    : ProcResource<1>;  // Load/Store 1
+def C1UUnitL23    : ProcResource<2>;  // Load 2/3
+def C1UUnitD      : ProcResource<2>;  // Store data 0/1
+def C1UUnitCME    : ProcResource<1>;  // CME operations block
+def C1UUnitFlg    : ProcResource<4>;  // Flags
+
+def C1UUnitS      : ProcResGroup<[C1UUnitS0, C1UUnitS1, C1UUnitS2, C1UUnitS3,
+                                  C1UUnitS4, C1UUnitS5]>;
+def C1UUnitI      : ProcResGroup<[C1UUnitS0, C1UUnitS1, C1UUnitS2, C1UUnitS3,
+                                  C1UUnitS4, C1UUnitS5, C1UUnitM0, C1UUnitM1]>;
+def C1UUnitI4     : ProcResGroup<[C1UUnitS0, C1UUnitS2, C1UUnitS4, C1UUnitM0]>;
+def C1UUnitM      : ProcResGroup<[C1UUnitM0, C1UUnitM1]>;
+def C1UUnitL      : ProcResGroup<[C1UUnitLS0, C1UUnitLS1, C1UUnitL23]>;
+def C1UUnitSA     : ProcResGroup<[C1UUnitLS0, C1UUnitLS1]>;
+def C1UUnitV      : ProcResGroup<[C1UUnitV0, C1UUnitV1, C1UUnitV2, 
+                                  C1UUnitV3, C1UUnitV4, C1UUnitV5]>;
+def C1UUnitV01    : ProcResGroup<[C1UUnitV0, C1UUnitV1]>;
+def C1UUnitV02    : ProcResGroup<[C1UUnitV0, C1UUnitV2]>;
+def C1UUnitV13    : ProcResGroup<[C1UUnitV1, C1UUnitV3]>;
+def C1UUnitV0123  : ProcResGroup<[C1UUnitV0, C1UUnitV1, 
+                                  C1UUnitV2, C1UUnitV3]>;
+def C1UUnitV0134  : ProcResGroup<[C1UUnitV0, C1UUnitV1, C1UUnitV3, C1UUnitV4]>;
+
+// Define commonly used read types.
+def : ReadAdvance<ReadI,       0>;
+def : ReadAdvance<ReadISReg,   0>;
+def : ReadAdvance<ReadIEReg,   0>;
+def : ReadAdvance<ReadIM,      0>;
+def : ReadAdvance<ReadIMA,     0>;
+def : ReadAdvance<ReadID,      0>;
+def : ReadAdvance<ReadExtrHi,  0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST,      0>;
+def : ReadAdvance<ReadVLD,     0>;
+
+def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint,    []> { let Latency = 1; }
+def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to C1 Ultra.
+// TODO: Flesh out with C1 Ultra latencies and port usage.
+//===----------------------------------------------------------------------===//
+
+// Define generic 0 micro-op types.
+def C1UWrite_0c : SchedWriteRes<[]> { let Latency = 0; }
+
+// Define a small set of generic 1 micro-op types as placeholders.
+def C1UWrite_1c_1B      : SchedWriteRes<[C1UUnitB]>  { let Latency = 1; }
+def C1UWrite_2c_1B      : SchedWriteRes<[C1UUnitB]>  { let Latency = 2; }
+def C1UWrite_1c_1I      : SchedWriteRes<[C1UUnitI]>  { let Latency = 1; }
+def C1UWrite_2c_1I      : SchedWriteRes<[C1UUnitI]>  { let Latency = 2; }
+def C1UWrite_2c_1I4     : SchedWriteRes<[C1UUnitI4]> { let Latency = 2; }
+def C1UWrite_1c_1M      : SchedWriteRes<[C1UUnitM]>  { let Latency = 1; }
+def C1UWrite_2c_1M      : SchedWriteRes<[C1UUnitM]>  { let Latency = 2; }
+def C1UWrite_2c_1M0     : SchedWriteRes<[C1UUnitM0]> { let Latency = 2; }
+def C1UWrite_3c_1M0     : SchedWriteRes<[C1UUnitM0]> { let Latency = 3; }
+def C1UWrite_4c_1M0     : SchedWriteRes<[C1UUnitM0]> { let Latency = 4; }
+def C1UWrite_12c_1M0    : SchedWriteRes<[C1UUnitM0]>  { let Latency = 12;
+                                                        let ReleaseAtCycles = [12]; }
+def C1UWrite_20c_1M0    : SchedWriteRes<[C1UUnitM0]>  { let Latency = 20;
+                                                        let ReleaseAtCycles = [20]; }
+def C1UWrite_1c_1V      : SchedWriteRes<[C1UUnitV]>  { let Latency = 1; }
+def C1UWrite_2c_1V      : SchedWriteRes<[C1UUnitV]>  { let Latency = 2; }
+def C1UWrite_3c_1V      : SchedWriteRes<[C1UUnitV]>  { let Latency = 3; }
+def C1UWrite_4c_1V      : SchedWriteRes<[C1UUnitV]>  { let Latency = 4; }
+def C1UWrite_6c_1V      : SchedWriteRes<[C1UUnitV]>  { let Latency = 6; }
+def C1UWrite_8c_1V      : SchedWriteRes<[C1UUnitV]>  { let Latency = 8; }
+def C1UWrite_1c_1V0     : SchedWriteRes<[C1UUnitV0]> { let Latency = 1; }
+def C1UWrite_2c_1V0     : SchedWriteRes<[C1UUnitV0]> { let Latency = 2; }
+def C1UWrite_4c_1V0     : SchedWriteRes<[C1UUnitV0]> { let Latency = 4; }
+def C1UWrite_6c_1V0     : SchedWriteRes<[C1UUnitV0]> { let Latency = 6; }
+def C1UWrite_10c_1V0    : SchedWriteRes<[C1UUnitV0]> { let Latency = 10;}
+def C1UWrite_12c_1V0    : SchedWriteRes<[C1UUnitV0]> { let Latency = 12; }
+def C1UWrite_20c_1V0    : SchedWriteRes<[C1UUnitV0]> { let Latency = 20; }
+def C1UWrite_3c_1V1     : SchedWriteRes<[C1UUnitV1]> { let Latency = 3; }
+def C1UWrite_5c_1V1     : SchedWriteRes<[C1UUnitV1]> { let Latency = 5; }
+def C1UWrite_8c_1V1     : SchedWriteRes<[C1UUnitV1]> { let Latency = 8; }
+def C1UWrite_12c_1V1    : SchedWriteRes<[C1UUnitV1]> { let Latency = 12; }
+def C1UWrite_2c_V01     : SchedWriteRes<[C1UUnitV01]>  { let Latency = 2; }
+def C1UWrite_3c_V01     : SchedWriteRes<[C1UUnitV01]>  { let Latency = 3; }
+def C1UWrite_2c_V0134   : SchedWriteRes<[C1UUnitV0134 ]>  { let Latency = 2; }
+def C1UWrite_3c_V0134   : SchedWriteRes<[C1UUnitV0134]>  { let Latency = 3; }
+def C1UWrite_3c_2V0134   : SchedWriteRes<[C1UUnitV0134, C1UUnitV0134]>  { let Latency = 3; }
+def C1UWrite_4c_V0134   : SchedWriteRes<[C1UUnitV0134]>  { let Latency = 4; }
+def C1UWrite_4c_2V0134   : SchedWriteRes<[C1UUnitV0134, C1UUnitV0134]>  { let Latency = 4; }
+def C1UWrite_6c_V0134   : SchedWriteRes<[C1UUnitV0134]>  { let Latency = 6; }
+def C1UWrite_6c_4V0134   : SchedWriteRes<[C1UUnitV0134, C1UUnitV0134, C1UUnitV0134, C1UUnitV0134]>  { let Latency = 6;}
+def C1UWrite_1c_1L      : SchedWriteRes<[C1UUnitL]>  { let Latency = 1; }
+def C1UWrite_4c_1L      : SchedWriteRes<[C1UUnitL]>  { let Latency = 4; }
+def C1UWrite_6c_1L      : SchedWriteRes<[C1UUnitL]>  { let Latency = 6; }
+def C1UWrite_1c_1SA     : SchedWriteRes<[C1UUnitSA]> { let Latency = 1; }
+
+def C1UWrite_2c_1B_1S : SchedWriteRes<[C1UUnitB, C1UUnitS]> {
+  let Latency     = 2;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_1c_1I_1Flg : SchedWriteRes<[C1UUnitI, C1UUnitFlg]> { let Latency = 1; }
+def C1UWrite_2c_1I_1Flg : SchedWriteRes<[C1UUnitI, C1UUnitFlg]> { let Latency = 2; }
+
+def C1UWrite_1c_1SA_1D : SchedWriteRes<[C1UUnitSA, C1UUnitD]> {
+  let Latency     = 1;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_1c_1SA_1D_1I : SchedWriteRes<[C1UUnitSA, C1UUnitD, C1UUnitI]> {
+  let Latency     = 1;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_2c_1SA_V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01]> { let Latency = 2; }
+def C1UWrite_2c_2SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitV01, C1UUnitV01]> { let Latency = 2; }
+def C1UWrite_2c_3SA_3V01 : SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitSA,
+                                         C1UUnitV01, C1UUnitV01, C1UUnitV01]> { let Latency = 2; }
+def C1UWrite_2c_4SA_4V01 : SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitSA, C1UUnitSA,
+                                          C1UUnitV01, C1UUnitV01, C1UUnitV01, C1UUnitV01]> { let Latency = 2; }
+def C1UWrite_2c_1SA_V01_1I : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitI]> { let Latency = 2; }
+
+def C1UWrite_2c_1I_1M : SchedWriteRes<[C1UUnitI, C1UUnitM]> {
+  let Latency     = 2;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_2c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+  let Latency     = 2;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_3c_1I_1M : SchedWriteRes<[C1UUnitI, C1UUnitM]> {
+  let Latency     = 3;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_4c_2L    : SchedWriteRes<[C1UUnitL, C1UUnitL]> { let Latency = 4; }
+def C1UWrite_4c_2L_1I : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitI]> { let Latency = 4; }
+
+def C1UWrite_5c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+  let Latency     = 5;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_5c_2L_1I : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitI]> { let Latency = 5; }
+
+def C1UWrite_6c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+  let Latency     = 6;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_6c_2L_1I : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitI]> { let Latency     = 6; }
+
+def C1UWrite_7c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+  let Latency     = 7;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_5c_1M0_1V : SchedWriteRes<[C1UUnitM0, C1UUnitV]> {
+  let Latency     = 5;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_4c_V0134_1V : SchedWriteRes<[C1UUnitV0134, C1UUnitV]> { let Latency = 4; }
+def C1UWrite_4c_2V0134_1V : SchedWriteRes<[C1UUnitV0134, C1UUnitV0134, C1UUnitV]> { let Latency = 4; }
+
+def C1UWrite_8c_V1_4rc : SchedWriteRes<[C1UUnitV1]> {
+  let Latency = 8;
+  let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_9c_V1_2rc : SchedWriteRes<[C1UUnitV1]> {
+  let Latency = 9;
+  let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_12c_V1_8rc : SchedWriteRes<[C1UUnitV1]> {
+  let Latency = 12;
+  let ReleaseAtCycles = [8];
+}
+
+def C1UWrite_11c_V1_4rc : SchedWriteRes<[C1UUnitV1]> {
+  let Latency = 11;
+  let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_13c_V1_2rc : SchedWriteRes<[C1UUnitV1]> {
+  let Latency = 13;
+  let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_6c_1M0_1B : SchedWriteRes<[C1UUnitM0, C1UUnitB]> { 
+  let Latency = 6; 
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_6c_1M0_1B_1I : SchedWriteRes<[C1UUnitM0, C1UUnitB, C1UUnitI]> { 
+  let Latency = 6; 
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_9c_1M0_1L : SchedWriteRes<[C1UUnitM0, C1UUnitL]> { 
+  let Latency = 9;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_6c_2L : SchedWriteRes<[C1UUnitL, C1UUnitL]> {
+  let Latency     = 6;
+  let NumMicroOps = 2;
+}
+
+
+def C1UWrite_6c_3L : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL]> {
+  let Latency     = 6;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_7c_4L : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL, C1UUnitL]> {
+  let Latency     = 7;
+  let NumMicroOps = 4;
+}
+
+def C1UWrite_8c_1L_1V : SchedWriteRes<[C1UUnitL, C1UUnitV]> {
+  let Latency     = 8;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_6c_1L_1V : SchedWriteRes<[C1UUnitL, C1UUnitV]> {
+  let Latency     = 6;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_8c_1L_2V : SchedWriteRes<[C1UUnitL, C1UUnitV, C1UUnitV]> {
+  let Latency     = 8;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_8c_2L_2V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitV, C1UUnitV]> {
+  let Latency     = 8;
+  let NumMicroOps = 4;
+}
+
+def C1UWrite_8c_2L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitV, C1UUnitV,
+                                      C1UUnitV]> {
+  let Latency     = 8;
+  let NumMicroOps = 5;
+}
+
+def C1UWrite_8C_3L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+                                      C1UUnitV, C1UUnitV, C1UUnitV]> {
+  let Latency     = 8;
+  let NumMicroOps = 6;
+}
+
+def C1UWrite_9c_3L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+                                      C1UUnitV, C1UUnitV, C1UUnitV]> {
+  let Latency     = 9;
+  let NumMicroOps = 6;
+}
+
+def C1UWrite_8c_3L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+                                      C1UUnitV, C1UUnitV, C1UUnitV]> {
+  let Latency     = 8;
+  let NumMicroOps = 6;
+}
+
+def C1UWrite_9c_6L_4V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL, C1UUnitL,
+                                      C1UUnitL, C1UUnitL, C1UUnitV, C1UUnitV,
+                                      C1UUnitV, C1UUnitV]> {
+  let Latency     = 9;
+  let NumMicroOps = 10;
+}
+
+def C1UWrite_8c_3L_4V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+                                      C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV]> {
+  let Latency     = 8;
+  let NumMicroOps = 7;
+}
+
+
+def C1UWrite_8c_4L_4V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL, C1UUnitL,
+                                      C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV]> {
+  let Latency = 8;
+  let NumMicroOps = 8;
+}
+
+def C1UWrite_4c_1SA_V01_V : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV]> { let Latency     = 4; }
+def C1UWrite_4c_2SA_2V01_2V : SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitV01, C1UUnitV01,
+                                             C1UUnitV, C1UUnitV]> { let Latency     = 4; }
+def C1UWrite_5c_1SA_V01_V : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV]> { let Latency     = 5; }
+def C1UWrite_5c_4SA_4V01_4V : SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitSA, C1UUnitSA,
+                                             C1UUnitV01, C1UUnitV01, C1UUnitV01, C1UUnitV01,
+                                             C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV]> { let Latency     = 5; }
+
+def C1UWrite_6c_1SA_V01_V: SchedWriteRes<[C1UUnitSA,C1UUnitV01, C1UUnitV]> { let Latency     = 6; }
+
+
+def C1UWrite_6c_6SA_6V01_6V: SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitSA,
+                                            C1UUnitSA, C1UUnitSA, C1UUnitSA,
+                                            C1UUnitV01, C1UUnitV01, C1UUnitV01,
+                                            C1UUnitV01, C1UUnitV01, C1UUnitV01,
+                                            C1UUnitV, C1UUnitV, C1UUnitV,
+                                            C1UUnitV, C1UUnitV, C1UUnitV ]> { let Latency     = 6; }
+
+def C1UWrite_6c_8SA_8V01_8V: SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitSA, C1UUnitSA,
+                                            C1UUnitSA, C1UUnitSA, C1UUnitSA, C1UUnitSA,
+                                            C1UUnitV01, C1UUnitV01, C1UUnitV01, C1UUnitV01,
+                                            C1UUnitV01, C1UUnitV01, C1UUnitV01, C1UUnitV01,
+                                            C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV,
+                                            C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV ]> { let Latency     = 6; }
+
+
+def C1UWrite_7c_1SA_V01_V: SchedWriteRes<[
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+]> {
+  let Latency     = 7;
+}
+
+def C1UWrite_7c_12SA_12V01_12V: SchedWriteRes<[
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+    C1UUnitSA, C1UUnitV01, C1UUnitV,
+    C1UUnitSA, C1UUnitV01, C1UUnitV
+]> {
+  let Latency     = 7;
+}
+
+def C1UWrite_7c_1M_1M0_1V : SchedWriteRes<[C1UUnitM, C1UUnitM0, C1UUnitV]> {
+  let Latency = 7;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_2V1 : SchedWriteRes<[C1UUnitV1, C1UUnitV1]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_8c_1M0_1V1_1V01 : SchedWriteRes<[C1UUnitM0, C1UUnitV1, C1UUnitV01]> {
+  let Latency = 8;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_1V1_1V01 : SchedWriteRes<[C1UUnitV1, C1UUnitV01]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_7c_1M0_V0134 : SchedWriteRes<[C1UUnitM0, C1UUnitV0134]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_5c_1V_1M0 : SchedWriteRes<[C1UUnitV, C1UUnitM0]> {
+  let Latency = 5;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_8c_1V_3V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV0134, C1UUnitV0134, C1UUnitV0134]> { let Latency = 8; }
+
+def C1UWrite_6c_1V_2V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV0134, C1UUnitV0134]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_2V_1V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV, C1UUnitV0134]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_4c_1V_1V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV0134]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_12c_1V0_8rc : SchedWriteRes<[C1UUnitV0]> {
+  let Latency = 12;
+  let ReleaseAtCycles = [8];
+}
+
+def C1UWrite_11c_1V0_4rc : SchedWriteRes<[C1UUnitV0]> {
+  let Latency = 11;
+  let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_13c_1V0_2rc : SchedWriteRes<[C1UUnitV0]> {
+  let Latency = 12;
+  let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_12c_1V1_8rc : SchedWriteRes<[C1UUnitV0]> {
+  let Latency = 12;
+  let ReleaseAtCycles = [8];
+}
+
+def C1UWrite_11c_1V1_4rc : SchedWriteRes<[C1UUnitV0]> {
+  let Latency = 11;
+  let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_13c_1V1_2rc : SchedWriteRes<[C1UUnitV0]> {
+  let Latency = 12;
+  let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_6c_1L_1M : SchedWriteRes<[C1UUnitL, C1UUnitM]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_9c_1V01_1L : SchedWriteRes<[C1UUnitV01, C1UUnitL]> {
+  let Latency = 9;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_9c_1I_1L_1V : SchedWriteRes<[C1UUnitI, C1UUnitL, C1UUnitV]> {
+  let Latency = 9;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_10c_1L_1V : SchedWriteRes<[C1UUnitL, C1UUnitV]> {
+  let Latency = 9;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_11c_1I_1L_1V : SchedWriteRes<[C1UUnitI, C1UUnitL, C1UUnitV]> {
+  let Latency = 11;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_2c_1SA_1V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_4c_1I_1V_1SA_1V : SchedWriteRes<[C1UUnitI, C1UUnitV, C1UUnitSA, C1UUnitV]> {
+  let Latency = 4;
+  let NumMicroOps = 4;
+}
+
+
+def C1UWrite_5c_1I_1V_1SA_1V : SchedWriteRes<[C1UUnitI, C1UUnitV, C1UUnitSA, C1UUnitV]> {
+  let Latency = 5;
+  let NumMicroOps = 4;
+}
+
+def C1UWrite_7c_1I_1V_1SA_1V : SchedWriteRes<[C1UUnitI, C1UUnitV, C1UUnitSA, C1UUnitV]> {
+  let Latency = 7;
+  let NumMicroOps = 4;
+}
+
+def C1UWrite_2c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+  let Latency = 2;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_4c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+  let Latency = 4;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_5c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+  let Latency = 5;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_7c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+  let Latency = 7;
+  let NumMicroOps = 3;
+}
+
+def C1UWrite_3c_1M0_1M : SchedWriteRes<[C1UUnitM0, C1UUnitM]> {
+  let Latency = 3;
+  let NumMicroOps = 2;
+}
+
+def C1UWrite_4c_1M0_1M : SchedWriteRes<[C1UUnitM0, C1UUnitM]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+
+// Forwarded types
+def C1UWr_FMA     : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_FMA     : SchedReadAdvance<2, [WriteFMul, C1UWr_FMA]>;
+
+def C1UWr_VA      : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VA      : SchedReadAdvance<3, [C1UWr_VA]>;
+
+def C1UWr_VDOT    : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1URd_VDOT    : SchedReadAdvance<2, [C1UWr_VDOT]>;
+
+def C1UWr_VMMA    : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1URd_VMMA    : SchedReadAdvance<2, [C1UWr_VMMA]>;
+
+def C1UWr_MA      : SchedWriteRes<[C1UUnitI4]> { let Latency = 3; }
+def C1URd_MA      : SchedReadAdvance<2, [C1UWr_MA]>;
+
+def C1UWr_VMA     : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_VMA     : SchedReadAdvance<3, [C1UWr_VMA]>;
+
+def C1UWr_VMAH    : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_VMAH    : SchedReadAdvance<2, [C1UWr_VMAH]>;
+
+def C1UWr_VPA     : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VPA     : SchedReadAdvance<3, [C1UWr_VPA]>;
+
+def C1UWr_VSA     : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VSA     : SchedReadAdvance<3, [C1UWr_VSA]>;
+
+def C1UWr_VFCMA   : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_VFCMA   : SchedReadAdvance<3, [C1UWr_VFCMA]>;
+
+def C1UWr_VFMA    : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VFMA    : SchedReadAdvance<2, [C1UWr_VFMA]>;
+
+def C1UWr_VBFDOT  : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VBFDOT  : SchedReadAdvance<2, [C1UWr_VBFDOT]>;
+
+def C1UWr_VBFMMA  : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_VBFMMA  : SchedReadAdvance<1, [C1UWr_VBFMMA]>;
+
+def C1UWr_VBFMAL  : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_VBFMAL  : SchedReadAdvance<3, [C1UWr_VBFMAL]>;
+
+def C1UWr_ZA      : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZA      : SchedReadAdvance<3, [C1UWr_ZA]>;
+def C1UWr_ZPA     : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZPA     : SchedReadAdvance<3, [C1UWr_ZPA]>;
+def C1UWr_ZSA     : SchedWriteRes<[C1UUnitV13]> { let Latency = 4; }
+def C1URd_ZSA     : SchedReadAdvance<3, [C1UWr_ZSA]>;
+
+def C1UWr_ZDOTB   : SchedWriteRes<[C1UUnitV]>   { let Latency = 3; }
+def C1URd_ZDOTB   : SchedReadAdvance<2, [C1UWr_ZDOTB]>;
+def C1UWr_ZDOTH   : SchedWriteRes<[C1UUnitV0134]> { let Latency = 3; }
+def C1URd_ZDOTH   : SchedReadAdvance<2, [C1UWr_ZDOTH]>;
+
+def C1UWr_ZCMABHS : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_ZCMABHS : SchedReadAdvance<3, [C1UWr_ZCMABHS]>;
+
+def C1UWr_ZMMA    : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1URd_ZMMA    : SchedReadAdvance<2, [C1UWr_ZMMA]>;
+
+def C1UWr_ZMA     : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_ZMA     : SchedReadAdvance<3, [C1UWr_ZMA]>;
+
+def C1UWr_ZMASQL  : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_ZMASQL  : SchedReadAdvance<2, [C1UWr_ZMASQL]>;
+
+def C1UWr_ZFCMA   : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZFCMA   : SchedReadAdvance<3, [C1UWr_ZFCMA]>;
+
+def C1UWr_ZFMA    : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZFMA    : SchedReadAdvance<2, [C1UWr_ZFMA]>;
+
+def C1UWr_ZFMAL   : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZFMAL   : SchedReadAdvance<3, [C1UWr_ZFMAL]>;
+
+
+def C1UWr_ZBFDOT  : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZBFDOT  : SchedReadAdvance<2, [C1UWr_ZBFDOT]>;
+def C1UWr_ZBFMMA  : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZBFMMA  : SchedReadAdvance<2, [C1UWr_ZBFMMA]>;
+def C1UWr_ZBFMAL  : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZBFMAL  : SchedReadAdvance<3, [C1UWr_ZBFMAL]>;
+
+def C1UWr_Pred    : SchedWriteRes<[C1UUnitM]> { let Latency = 2; }
+def C1URd_Pred    : SchedReadAdvance<1, [C1UWr_Pred]>;
+
+// Predicate controlled types
+def C1UWrite_ArithI : SchedWriteVariant<[
+                       SchedVar<IsCheapLSL,  [C1UWrite_1c_1I]>,
+                       SchedVar<NoSchedPred, [C1UWrite_2c_1I]>]>;
+
+def C1UWrite_Extr : SchedWriteVariant<[
+                     SchedVar<IsRORImmIdiomPred, [C1UWrite_1c_1I]>,
+                     SchedVar<NoSchedPred,       [C1UWrite_3c_1I_1M]>]>;
+
+def C1UWrite_LdrQ : SchedWriteVariant<[
+                      SchedVar<FPIsQForm,   [C1UWrite_7c_1L_1I]>,
+                      SchedVar<NoSchedPred,     [C1UWrite_6c_1L]>]>;
+
+def C1UWrite_StrQ : SchedWriteVariant<[
+                      SchedVar<FPIsQForm,   [C1UWrite_2c_1SA_V01_1I]>,
+                      SchedVar<NoSchedPred,     [C1UWrite_2c_1SA_V01]>]>;
+
+def C1UWrite_1or2c_1I_1Flg : SchedWriteVariant<[
+                      SchedVar<IsXOrWDest, [C1UWrite_1c_1I_1Flg]>,
+                      SchedVar<NoSchedPred, [C1UWrite_2c_1I_1Flg]>]>;
+
+def C1UWrite_2or3c_1V0 : SchedWriteVariant<[
+                      SchedVar<NeoversePdIsPg,  [C1UWrite_1c_1V0]>,
+                      SchedVar<NoSchedPred,     [C1UWrite_2c_1V0]>]>;
+
+def C1UWrite_2or4c_1SA_2V01 : SchedWriteVariant<[
+                                SchedVar<SameZRegDstSrcPred, [C1UWrite_4c_1SA_2V01]>,
+                                SchedVar<NoSchedPred, [C1UWrite_2c_1SA_2V01]>]>;
+
+def C1UWrite_4or6c_1SA_2V01 : SchedWriteVariant<[
+                                SchedVar<SameZRegDstSrcPred, [C1UWrite_6c_1SA_2V01]>,
+                                SchedVar<NoSchedPred, [C1UWrite_4c_1SA_2V01]>]>;
+
+def C1UWrite_5or7c_1SA_2V01 : SchedWriteVariant<[
+                                SchedVar<SameZRegDstSrcPred, [C1UWrite_7c_1SA_2V01]>,
+                                SchedVar<NoSchedPred, [C1UWrite_5c_1SA_2V01]>]>;
+
+
+def C1UWrite_3or4c_1M0_1M : SchedWriteVariant<[
+                      SchedVar<NeoversePdIsPg,  [C1UWrite_4c_1M0_1M]>,
+                      SchedVar<NoSchedPred,     [C1UWrite_3c_1M0_1M]>]>;
+
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction scheduling classes.
+//===----------------------------------------------------------------------===//
+
+// Branch instructions
+// -----------------------------------------------------------------------------
+
+// Branch, immed
+// Compare and branch
+def : SchedAlias<WriteBr,    C1UWrite_2c_1B>;
+
+// Branch, register
+def : SchedAlias<WriteBrReg, C1UWrite_2c_1B>;
+
+// Branch and link, immed
+// Branch and link, register
+def : InstRW<[C1UWrite_2c_1B_1S], (instrs BL, BLR)>;
+
+
+// Arithmetic and logical operations
+// -----------------------------------------------------------------------------
+
+// ALU, basic
+def : SchedAlias<WriteI, C1UWrite_1c_1I>;
+
+// ALU, basic, flagset
+def : InstRW<[C1UWrite_1c_1I_1Flg],
+             (instregex "^(ADD|SUB)S[WX]r[ir]$",
+                        "^(ADC|SBC)S[WX]r$",
+                        "^ANDS[WX]ri$",
+                        "^(AND|BIC)S[WX]rr$")>;
+
+// ALU, extend and shift
+def : SchedAlias<WriteIEReg, C1UWrite_1or2c_1I_1Flg>;
+
+// Arithmetic, LSL shift, shift <= 4
+// Arithmetic, flagset, LSL shift, shift <= 4
+// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
+def : SchedAlias<WriteISReg, C1UWrite_ArithI>;
+
+
+// Arithmetic, immediate to logical address tag
+def : InstRW<[C1UWrite_2c_1I], (instrs ADDG, SUBG)>;
+
+// Conditional compare
+def : InstRW<[C1UWrite_1c_1I_1Flg], (instregex "^CCM[NP][WX][ir]")>;
+
+// Conditional select
+def : InstRW<[C1UWrite_1c_1I_1Flg],
+              (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)[ir]")>;
+
+// Convert floating-point condition flags
+// Flag manipulation instructions
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+// Insert Random Tags
+def : InstRW<[C1UWrite_2c_1M], (instrs IRG, IRGstack)>;
+
+// Insert Tag Mask
+// Subtract Pointer
+def : InstRW<[C1UWrite_1c_1I], (instrs GMI, SUBP)>;
+
+// Subtract Pointer, flagset
+def : InstRW<[C1UWrite_1c_1I_1Flg], (instrs SUBPS)>;
+
+// Logical, shift, no flagset
+def : InstRW<[C1UWrite_1c_1I],
+             (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs",
+                        "^ORR[WX]rs$")>;
+
+// Logical, shift, flagset
+def : InstRW<[C1UWrite_1c_1I_1Flg], (instregex "^(AND|BIC)S[WX]rs$")>;
+
+// Move and shift instructions
+// -----------------------------------------------------------------------------
+
+def : SchedAlias<WriteImm, C1UWrite_1c_1I>;
+
+// Divide and multiply instructions
+// -----------------------------------------------------------------------------
+
+// SDIV, UDIV
+def : SchedAlias<WriteID32,  C1UWrite_12c_1M0>;
+def : SchedAlias<WriteID64,  C1UWrite_20c_1M0>;
+
+def : SchedAlias<WriteIM32, C1UWrite_2c_1M>;
+def : SchedAlias<WriteIM64, C1UWrite_2c_1M>;
+
+// Multiply accumulate, W-form
+// Multiply accumulate, X-form
+def : InstRW<[C1UWr_MA, C1URd_MA], (instregex "^M(ADD|SUB)[WX]rrr$")>;
+
+// Multiply accumulate long
+def : InstRW<[C1UWr_MA, C1URd_MA], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
----------------
nasherm wrote:

Done

https://github.com/llvm/llvm-project/pull/182251


More information about the llvm-commits mailing list