[llvm] [AArch64] C1-Ultra Scheduling model (PR #182251)
Nashe Mncube via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 16 04:45:25 PDT 2026
================
@@ -0,0 +1,2409 @@
+//=- AArch64SchedC1Ultra.td - C1 Ultra Scheduling Defs --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the C1 Ultra processors.
+// Information is taken from the C1 Ultra Software Optimization Guide
+//
+// https://developer.arm.com/documentation/111079/3-0
+//
+//===----------------------------------------------------------------------===//
+
+def C1UltraModel : SchedMachineModel {
+ let IssueWidth = 10;
+ let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer. NOTE: Copied from Neoverse-V3
+ let LoadLatency = 4; // Optimistic load latency. NOTE: Copied from Neoverse-V3
+ let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N3.
+ let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
+ [HasSVE2p1, HasSVEB16B16,
+ HasCPA, HasCSSC]);
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on C1 Ultra.
+// Instructions are first fetched and then decoded into internal macro-ops
+// (MOPs). From there, the MOPs proceed through register renaming and dispatch
+// stages. A MOP can be split into two micro-ops further down the pipeline
+// after the decode stage.
+
+let SchedModel = C1UltraModel in {
+
+// Define the issue ports.
+def C1UUnitB : ProcResource<3>; // Branch 0/1/2
+def C1UUnitS0 : ProcResource<1>; // Integer single-cycle 0
+def C1UUnitS1 : ProcResource<1>; // Integer single-cycle 1
+def C1UUnitS2 : ProcResource<1>; // Integer single-cycle 2
+def C1UUnitS3 : ProcResource<1>; // Integer single-cycle 3
+def C1UUnitS4 : ProcResource<1>; // Integer single-cycle 4
+def C1UUnitS5 : ProcResource<1>; // Integer single-cycle 5
+def C1UUnitM0 : ProcResource<1>; // Integer single/multicycle 0
+def C1UUnitM1 : ProcResource<1>; // Integer single/multicycle 1
+def C1UUnitV0 : ProcResource<1>; // FP/ASIMD 0
+def C1UUnitV1 : ProcResource<1>; // FP/ASIMD 1
+def C1UUnitV2 : ProcResource<1>; // FP/ASIMD 2
+def C1UUnitV3 : ProcResource<1>; // FP/ASIMD 3
+def C1UUnitV4 : ProcResource<1>; // FP/ASIMD 4
+def C1UUnitV5 : ProcResource<1>; // FP/ASIMD 5
+def C1UUnitLS0 : ProcResource<1>; // Load/Store 0
+def C1UUnitLS1 : ProcResource<1>; // Load/Store 1
+def C1UUnitL23 : ProcResource<2>; // Load 2/3
+def C1UUnitD : ProcResource<2>; // Store data 0/1
+def C1UUnitCME : ProcResource<1>; // CME operations block
+def C1UUnitFlg : ProcResource<4>; // Flags
+
+def C1UUnitS : ProcResGroup<[C1UUnitS0, C1UUnitS1, C1UUnitS2, C1UUnitS3,
+ C1UUnitS4, C1UUnitS5]>;
+def C1UUnitI : ProcResGroup<[C1UUnitS0, C1UUnitS1, C1UUnitS2, C1UUnitS3,
+ C1UUnitS4, C1UUnitS5, C1UUnitM0, C1UUnitM1]>;
+def C1UUnitI4 : ProcResGroup<[C1UUnitS0, C1UUnitS2, C1UUnitS4, C1UUnitM0]>;
+def C1UUnitM : ProcResGroup<[C1UUnitM0, C1UUnitM1]>;
+def C1UUnitL : ProcResGroup<[C1UUnitLS0, C1UUnitLS1, C1UUnitL23]>;
+def C1UUnitSA : ProcResGroup<[C1UUnitLS0, C1UUnitLS1]>;
+def C1UUnitV : ProcResGroup<[C1UUnitV0, C1UUnitV1, C1UUnitV2,
+ C1UUnitV3, C1UUnitV4, C1UUnitV5]>;
+def C1UUnitV01 : ProcResGroup<[C1UUnitV0, C1UUnitV1]>;
+def C1UUnitV02 : ProcResGroup<[C1UUnitV0, C1UUnitV2]>;
+def C1UUnitV13 : ProcResGroup<[C1UUnitV1, C1UUnitV3]>;
+def C1UUnitV0123 : ProcResGroup<[C1UUnitV0, C1UUnitV1,
+ C1UUnitV2, C1UUnitV3]>;
+def C1UUnitV0134 : ProcResGroup<[C1UUnitV0, C1UUnitV1, C1UUnitV3, C1UUnitV4]>;
+
+// Define commonly used read types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to C1 Ultra.
+// TODO: Flesh out with C1 Ultra latencies and port usage.
+//===----------------------------------------------------------------------===//
+
+// Define generic 0 micro-op types.
+def C1UWrite_0c : SchedWriteRes<[]> { let Latency = 0; }
+
+// Define a small set of generic 1 micro-op types as placeholders.
+def C1UWrite_1c_1B : SchedWriteRes<[C1UUnitB]> { let Latency = 1; }
+def C1UWrite_2c_1B : SchedWriteRes<[C1UUnitB]> { let Latency = 2; }
+def C1UWrite_1c_1I : SchedWriteRes<[C1UUnitI]> { let Latency = 1; }
+def C1UWrite_2c_1I : SchedWriteRes<[C1UUnitI]> { let Latency = 2; }
+def C1UWrite_2c_1I4 : SchedWriteRes<[C1UUnitI4]> { let Latency = 2; }
+def C1UWrite_1c_1M : SchedWriteRes<[C1UUnitM]> { let Latency = 1; }
+def C1UWrite_2c_1M : SchedWriteRes<[C1UUnitM]> { let Latency = 2; }
+def C1UWrite_2c_1M0 : SchedWriteRes<[C1UUnitM0]> { let Latency = 2; }
+def C1UWrite_3c_1M0 : SchedWriteRes<[C1UUnitM0]> { let Latency = 3; }
+def C1UWrite_4c_1M0 : SchedWriteRes<[C1UUnitM0]> { let Latency = 4; }
+def C1UWrite_12c_1M0 : SchedWriteRes<[C1UUnitM0]> { let Latency = 12;
+ let ReleaseAtCycles = [12]; }
+def C1UWrite_20c_1M0 : SchedWriteRes<[C1UUnitM0]> { let Latency = 20;
+ let ReleaseAtCycles = [20]; }
+def C1UWrite_1c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 1; }
+def C1UWrite_2c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 2; }
+def C1UWrite_3c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1UWrite_4c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1UWrite_6c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 6; }
+def C1UWrite_8c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 8; }
+def C1UWrite_1c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 1; }
+def C1UWrite_2c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 2; }
+def C1UWrite_4c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 4; }
+def C1UWrite_6c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 6; }
+def C1UWrite_10c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 10;}
+def C1UWrite_12c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 12; }
+def C1UWrite_20c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 20; }
+def C1UWrite_3c_1V1 : SchedWriteRes<[C1UUnitV1]> { let Latency = 3; }
+def C1UWrite_5c_1V1 : SchedWriteRes<[C1UUnitV1]> { let Latency = 5; }
+def C1UWrite_8c_1V1 : SchedWriteRes<[C1UUnitV1]> { let Latency = 8; }
+def C1UWrite_12c_1V1 : SchedWriteRes<[C1UUnitV1]> { let Latency = 12; }
+def C1UWrite_2c_V01 : SchedWriteRes<[C1UUnitV01]> { let Latency = 2; }
+def C1UWrite_3c_V01 : SchedWriteRes<[C1UUnitV01]> { let Latency = 3; }
+def C1UWrite_2c_V0134 : SchedWriteRes<[C1UUnitV0134 ]> { let Latency = 2; }
+def C1UWrite_3c_V0134 : SchedWriteRes<[C1UUnitV0134]> { let Latency = 3; }
+def C1UWrite_3c_2V0134 : SchedWriteRes<[C1UUnitV0134, C1UUnitV0134]> { let Latency = 3; }
+def C1UWrite_4c_V0134 : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1UWrite_4c_2V0134 : SchedWriteRes<[C1UUnitV0134, C1UUnitV0134]> { let Latency = 4; }
+def C1UWrite_6c_V0134 : SchedWriteRes<[C1UUnitV0134]> { let Latency = 6; }
+def C1UWrite_6c_4V0134 : SchedWriteRes<[C1UUnitV0134, C1UUnitV0134, C1UUnitV0134, C1UUnitV0134]> { let Latency = 6;}
+def C1UWrite_1c_1L : SchedWriteRes<[C1UUnitL]> { let Latency = 1; }
+def C1UWrite_4c_1L : SchedWriteRes<[C1UUnitL]> { let Latency = 4; }
+def C1UWrite_6c_1L : SchedWriteRes<[C1UUnitL]> { let Latency = 6; }
+def C1UWrite_1c_1SA : SchedWriteRes<[C1UUnitSA]> { let Latency = 1; }
+
+def C1UWrite_2c_1B_1S : SchedWriteRes<[C1UUnitB, C1UUnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_1c_1I_1Flg : SchedWriteRes<[C1UUnitI, C1UUnitFlg]> { let Latency = 1; }
+def C1UWrite_2c_1I_1Flg : SchedWriteRes<[C1UUnitI, C1UUnitFlg]> { let Latency = 2; }
+
+def C1UWrite_1c_1SA_1D : SchedWriteRes<[C1UUnitSA, C1UUnitD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_1c_1SA_1D_1I : SchedWriteRes<[C1UUnitSA, C1UUnitD, C1UUnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_2c_1SA_V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01]> { let Latency = 2; }
+def C1UWrite_2c_2SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitV01, C1UUnitV01]> { let Latency = 2; }
+def C1UWrite_2c_3SA_3V01 : SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitSA,
+ C1UUnitV01, C1UUnitV01, C1UUnitV01]> { let Latency = 2; }
+def C1UWrite_2c_4SA_4V01 : SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitSA, C1UUnitSA,
+ C1UUnitV01, C1UUnitV01, C1UUnitV01, C1UUnitV01]> { let Latency = 2; }
+def C1UWrite_2c_1SA_V01_1I : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitI]> { let Latency = 2; }
+
+def C1UWrite_2c_1I_1M : SchedWriteRes<[C1UUnitI, C1UUnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_2c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_3c_1I_1M : SchedWriteRes<[C1UUnitI, C1UUnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_4c_2L : SchedWriteRes<[C1UUnitL, C1UUnitL]> { let Latency = 4; }
+def C1UWrite_4c_2L_1I : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitI]> { let Latency = 4; }
+
+def C1UWrite_5c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_5c_2L_1I : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitI]> { let Latency = 5; }
+
+def C1UWrite_6c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_6c_2L_1I : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitI]> { let Latency = 6; }
+
+def C1UWrite_7c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_5c_1M0_1V : SchedWriteRes<[C1UUnitM0, C1UUnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_4c_V0134_1V : SchedWriteRes<[C1UUnitV0134, C1UUnitV]> { let Latency = 4; }
+def C1UWrite_4c_2V0134_1V : SchedWriteRes<[C1UUnitV0134, C1UUnitV0134, C1UUnitV]> { let Latency = 4; }
+
+def C1UWrite_8c_V1_4rc : SchedWriteRes<[C1UUnitV1]> {
+ let Latency = 8;
+ let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_9c_V1_2rc : SchedWriteRes<[C1UUnitV1]> {
+ let Latency = 9;
+ let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_12c_V1_8rc : SchedWriteRes<[C1UUnitV1]> {
+ let Latency = 12;
+ let ReleaseAtCycles = [8];
+}
+
+def C1UWrite_11c_V1_4rc : SchedWriteRes<[C1UUnitV1]> {
+ let Latency = 11;
+ let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_13c_V1_2rc : SchedWriteRes<[C1UUnitV1]> {
+ let Latency = 13;
+ let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_6c_1M0_1B : SchedWriteRes<[C1UUnitM0, C1UUnitB]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_6c_1M0_1B_1I : SchedWriteRes<[C1UUnitM0, C1UUnitB, C1UUnitI]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_9c_1M0_1L : SchedWriteRes<[C1UUnitM0, C1UUnitL]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_6c_2L : SchedWriteRes<[C1UUnitL, C1UUnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+
+def C1UWrite_6c_3L : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_7c_4L : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL, C1UUnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+
+def C1UWrite_8c_1L_1V : SchedWriteRes<[C1UUnitL, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_6c_1L_1V : SchedWriteRes<[C1UUnitL, C1UUnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_8c_1L_2V : SchedWriteRes<[C1UUnitL, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_8c_2L_2V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def C1UWrite_8c_2L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitV, C1UUnitV,
+ C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+def C1UWrite_8C_3L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitV, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+}
+
+def C1UWrite_9c_3L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitV, C1UUnitV, C1UUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def C1UWrite_8c_3L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitV, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+}
+
+def C1UWrite_9c_6L_4V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitL, C1UUnitL, C1UUnitV, C1UUnitV,
+ C1UUnitV, C1UUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 10;
+}
+
+def C1UWrite_8c_3L_4V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 7;
+}
+
+
+def C1UWrite_8c_4L_4V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+def C1UWrite_4c_1SA_V01_V : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV]> { let Latency = 4; }
+def C1UWrite_4c_2SA_2V01_2V : SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitV01, C1UUnitV01,
+ C1UUnitV, C1UUnitV]> { let Latency = 4; }
+def C1UWrite_5c_1SA_V01_V : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV]> { let Latency = 5; }
+def C1UWrite_5c_4SA_4V01_4V : SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitSA, C1UUnitSA,
+ C1UUnitV01, C1UUnitV01, C1UUnitV01, C1UUnitV01,
+ C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV]> { let Latency = 5; }
+
+def C1UWrite_6c_1SA_V01_V: SchedWriteRes<[C1UUnitSA,C1UUnitV01, C1UUnitV]> { let Latency = 6; }
+
+
+def C1UWrite_6c_6SA_6V01_6V: SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitSA,
+ C1UUnitSA, C1UUnitSA, C1UUnitSA,
+ C1UUnitV01, C1UUnitV01, C1UUnitV01,
+ C1UUnitV01, C1UUnitV01, C1UUnitV01,
+ C1UUnitV, C1UUnitV, C1UUnitV,
+ C1UUnitV, C1UUnitV, C1UUnitV ]> { let Latency = 6; }
+
+def C1UWrite_6c_8SA_8V01_8V: SchedWriteRes<[C1UUnitSA, C1UUnitSA, C1UUnitSA, C1UUnitSA,
+ C1UUnitSA, C1UUnitSA, C1UUnitSA, C1UUnitSA,
+ C1UUnitV01, C1UUnitV01, C1UUnitV01, C1UUnitV01,
+ C1UUnitV01, C1UUnitV01, C1UUnitV01, C1UUnitV01,
+ C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV,
+ C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV ]> { let Latency = 6; }
+
+
+def C1UWrite_7c_1SA_V01_V: SchedWriteRes<[
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+]> {
+ let Latency = 7;
+}
+
+def C1UWrite_7c_12SA_12V01_12V: SchedWriteRes<[
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+ C1UUnitSA, C1UUnitV01, C1UUnitV,
+ C1UUnitSA, C1UUnitV01, C1UUnitV
+]> {
+ let Latency = 7;
+}
+
+def C1UWrite_7c_1M_1M0_1V : SchedWriteRes<[C1UUnitM, C1UUnitM0, C1UUnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_2V1 : SchedWriteRes<[C1UUnitV1, C1UUnitV1]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_8c_1M0_1V1_1V01 : SchedWriteRes<[C1UUnitM0, C1UUnitV1, C1UUnitV01]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_1V1_1V01 : SchedWriteRes<[C1UUnitV1, C1UUnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_7c_1M0_V0134 : SchedWriteRes<[C1UUnitM0, C1UUnitV0134]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_5c_1V_1M0 : SchedWriteRes<[C1UUnitV, C1UUnitM0]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_8c_1V_3V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV0134, C1UUnitV0134, C1UUnitV0134]> { let Latency = 8; }
+
+def C1UWrite_6c_1V_2V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV0134, C1UUnitV0134]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_2V_1V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV, C1UUnitV0134]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_4c_1V_1V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV0134]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_12c_1V0_8rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 12;
+ let ReleaseAtCycles = [8];
+}
+
+def C1UWrite_11c_1V0_4rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 11;
+ let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_13c_1V0_2rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 12;
+ let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_12c_1V1_8rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 12;
+ let ReleaseAtCycles = [8];
+}
+
+def C1UWrite_11c_1V1_4rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 11;
+ let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_13c_1V1_2rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 12;
+ let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_6c_1L_1M : SchedWriteRes<[C1UUnitL, C1UUnitM]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_9c_1V01_1L : SchedWriteRes<[C1UUnitV01, C1UUnitL]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_9c_1I_1L_1V : SchedWriteRes<[C1UUnitI, C1UUnitL, C1UUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_10c_1L_1V : SchedWriteRes<[C1UUnitL, C1UUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_11c_1I_1L_1V : SchedWriteRes<[C1UUnitI, C1UUnitL, C1UUnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_2c_1SA_1V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_4c_1I_1V_1SA_1V : SchedWriteRes<[C1UUnitI, C1UUnitV, C1UUnitSA, C1UUnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+
+def C1UWrite_5c_1I_1V_1SA_1V : SchedWriteRes<[C1UUnitI, C1UUnitV, C1UUnitSA, C1UUnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def C1UWrite_7c_1I_1V_1SA_1V : SchedWriteRes<[C1UUnitI, C1UUnitV, C1UUnitSA, C1UUnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+
+def C1UWrite_2c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_4c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_5c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_7c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_3c_1M0_1M : SchedWriteRes<[C1UUnitM0, C1UUnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_4c_1M0_1M : SchedWriteRes<[C1UUnitM0, C1UUnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// Forwarded types
+def C1UWr_FMA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_FMA : SchedReadAdvance<2, [WriteFMul, C1UWr_FMA]>;
+
+def C1UWr_VA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VA : SchedReadAdvance<3, [C1UWr_VA]>;
+
+def C1UWr_VDOT : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1URd_VDOT : SchedReadAdvance<2, [C1UWr_VDOT]>;
+
+def C1UWr_VMMA : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1URd_VMMA : SchedReadAdvance<2, [C1UWr_VMMA]>;
+
+def C1UWr_MA : SchedWriteRes<[C1UUnitI4]> { let Latency = 3; }
+def C1URd_MA : SchedReadAdvance<2, [C1UWr_MA]>;
+
+def C1UWr_VMA : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_VMA : SchedReadAdvance<3, [C1UWr_VMA]>;
+
+def C1UWr_VMAH : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_VMAH : SchedReadAdvance<2, [C1UWr_VMAH]>;
+
+def C1UWr_VPA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VPA : SchedReadAdvance<3, [C1UWr_VPA]>;
+
+def C1UWr_VSA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VSA : SchedReadAdvance<3, [C1UWr_VSA]>;
+
+def C1UWr_VFCMA : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_VFCMA : SchedReadAdvance<3, [C1UWr_VFCMA]>;
+
+def C1UWr_VFMA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VFMA : SchedReadAdvance<2, [C1UWr_VFMA]>;
+
+def C1UWr_VBFDOT : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VBFDOT : SchedReadAdvance<2, [C1UWr_VBFDOT]>;
+
+def C1UWr_VBFMMA : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_VBFMMA : SchedReadAdvance<1, [C1UWr_VBFMMA]>;
+
+def C1UWr_VBFMAL : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_VBFMAL : SchedReadAdvance<3, [C1UWr_VBFMAL]>;
+
+def C1UWr_ZA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZA : SchedReadAdvance<3, [C1UWr_ZA]>;
+def C1UWr_ZPA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZPA : SchedReadAdvance<3, [C1UWr_ZPA]>;
+def C1UWr_ZSA : SchedWriteRes<[C1UUnitV13]> { let Latency = 4; }
+def C1URd_ZSA : SchedReadAdvance<3, [C1UWr_ZSA]>;
+
+def C1UWr_ZDOTB : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1URd_ZDOTB : SchedReadAdvance<2, [C1UWr_ZDOTB]>;
+def C1UWr_ZDOTH : SchedWriteRes<[C1UUnitV0134]> { let Latency = 3; }
+def C1URd_ZDOTH : SchedReadAdvance<2, [C1UWr_ZDOTH]>;
+
+def C1UWr_ZCMABHS : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_ZCMABHS : SchedReadAdvance<3, [C1UWr_ZCMABHS]>;
+
+def C1UWr_ZMMA : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1URd_ZMMA : SchedReadAdvance<2, [C1UWr_ZMMA]>;
+
+def C1UWr_ZMA : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_ZMA : SchedReadAdvance<3, [C1UWr_ZMA]>;
+
+def C1UWr_ZMASQL : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_ZMASQL : SchedReadAdvance<2, [C1UWr_ZMASQL]>;
+
+def C1UWr_ZFCMA : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZFCMA : SchedReadAdvance<3, [C1UWr_ZFCMA]>;
+
+def C1UWr_ZFMA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZFMA : SchedReadAdvance<2, [C1UWr_ZFMA]>;
+
+def C1UWr_ZFMAL : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZFMAL : SchedReadAdvance<3, [C1UWr_ZFMAL]>;
+
+
+def C1UWr_ZBFDOT : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZBFDOT : SchedReadAdvance<2, [C1UWr_ZBFDOT]>;
+def C1UWr_ZBFMMA : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZBFMMA : SchedReadAdvance<2, [C1UWr_ZBFMMA]>;
+def C1UWr_ZBFMAL : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZBFMAL : SchedReadAdvance<3, [C1UWr_ZBFMAL]>;
+
+def C1UWr_Pred : SchedWriteRes<[C1UUnitM]> { let Latency = 2; }
+def C1URd_Pred : SchedReadAdvance<1, [C1UWr_Pred]>;
+
+// Predicate controlled types
+def C1UWrite_ArithI : SchedWriteVariant<[
+ SchedVar<IsCheapLSL, [C1UWrite_1c_1I]>,
+ SchedVar<NoSchedPred, [C1UWrite_2c_1I]>]>;
+
+def C1UWrite_Extr : SchedWriteVariant<[
+ SchedVar<IsRORImmIdiomPred, [C1UWrite_1c_1I]>,
+ SchedVar<NoSchedPred, [C1UWrite_3c_1I_1M]>]>;
+
+def C1UWrite_LdrQ : SchedWriteVariant<[
+ SchedVar<FPIsQForm, [C1UWrite_7c_1L_1I]>,
+ SchedVar<NoSchedPred, [C1UWrite_6c_1L]>]>;
+
+def C1UWrite_StrQ : SchedWriteVariant<[
+ SchedVar<FPIsQForm, [C1UWrite_2c_1SA_V01_1I]>,
+ SchedVar<NoSchedPred, [C1UWrite_2c_1SA_V01]>]>;
+
+def C1UWrite_1or2c_1I_1Flg : SchedWriteVariant<[
+ SchedVar<IsXOrWDest, [C1UWrite_1c_1I_1Flg]>,
+ SchedVar<NoSchedPred, [C1UWrite_2c_1I_1Flg]>]>;
+
+def C1UWrite_2or3c_1V0 : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [C1UWrite_1c_1V0]>,
+ SchedVar<NoSchedPred, [C1UWrite_2c_1V0]>]>;
+
+def C1UWrite_2or4c_1SA_2V01 : SchedWriteVariant<[
+ SchedVar<SameZRegDstSrcPred, [C1UWrite_4c_1SA_2V01]>,
+ SchedVar<NoSchedPred, [C1UWrite_2c_1SA_2V01]>]>;
+
+def C1UWrite_4or6c_1SA_2V01 : SchedWriteVariant<[
+ SchedVar<SameZRegDstSrcPred, [C1UWrite_6c_1SA_2V01]>,
+ SchedVar<NoSchedPred, [C1UWrite_4c_1SA_2V01]>]>;
+
+def C1UWrite_5or7c_1SA_2V01 : SchedWriteVariant<[
+ SchedVar<SameZRegDstSrcPred, [C1UWrite_7c_1SA_2V01]>,
+ SchedVar<NoSchedPred, [C1UWrite_5c_1SA_2V01]>]>;
+
+
+def C1UWrite_3or4c_1M0_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [C1UWrite_4c_1M0_1M]>,
+ SchedVar<NoSchedPred, [C1UWrite_3c_1M0_1M]>]>;
+
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction scheduling classes.
+//===----------------------------------------------------------------------===//
+
+// Branch instructions
+// -----------------------------------------------------------------------------
+
+// Branch, immed
+// Compare and branch
+def : SchedAlias<WriteBr, C1UWrite_2c_1B>;
+
+// Branch, register
+def : SchedAlias<WriteBrReg, C1UWrite_2c_1B>;
+
+// Branch and link, immed
+// Branch and link, register
+def : InstRW<[C1UWrite_2c_1B_1S], (instrs BL, BLR)>;
+
+
+// Arithmetic and logical operations
+// -----------------------------------------------------------------------------
+
+// ALU, basic
+def : SchedAlias<WriteI, C1UWrite_1c_1I>;
+
+// ALU, basic, flagset
+def : InstRW<[C1UWrite_1c_1I_1Flg],
+ (instregex "^(ADD|SUB)S[WX]r[ir]$",
+ "^(ADC|SBC)S[WX]r$",
+ "^ANDS[WX]ri$",
+ "^(AND|BIC)S[WX]rr$")>;
+
+// ALU, extend and shift
+def : SchedAlias<WriteIEReg, C1UWrite_1or2c_1I_1Flg>;
+
+// Arithmetic, LSL shift, shift <= 4
+// Arithmetic, flagset, LSL shift, shift <= 4
+// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
+def : SchedAlias<WriteISReg, C1UWrite_ArithI>;
+
+
+// Arithmetic, immediate to logical address tag
+def : InstRW<[C1UWrite_2c_1I], (instrs ADDG, SUBG)>;
+
+// Conditional compare
+def : InstRW<[C1UWrite_1c_1I_1Flg], (instregex "^CCM[NP][WX][ir]")>;
+
+// Conditional select
+def : InstRW<[C1UWrite_1c_1I_1Flg],
+ (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)[ir]")>;
+
+// Convert floating-point condition flags
+// Flag manipulation instructions
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+// Insert Random Tags
+def : InstRW<[C1UWrite_2c_1M], (instrs IRG, IRGstack)>;
+
+// Insert Tag Mask
+// Subtract Pointer
+def : InstRW<[C1UWrite_1c_1I], (instrs GMI, SUBP)>;
+
+// Subtract Pointer, flagset
+def : InstRW<[C1UWrite_1c_1I_1Flg], (instrs SUBPS)>;
+
+// Logical, shift, no flagset
+def : InstRW<[C1UWrite_1c_1I],
+ (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs",
+ "^ORR[WX]rs$")>;
+
+// Logical, shift, flagset
+def : InstRW<[C1UWrite_1c_1I_1Flg], (instregex "^(AND|BIC)S[WX]rs$")>;
+
+// Move and shift instructions
+// -----------------------------------------------------------------------------
+
+def : SchedAlias<WriteImm, C1UWrite_1c_1I>;
+
+// Divide and multiply instructions
+// -----------------------------------------------------------------------------
+
+// SDIV, UDIV
+def : SchedAlias<WriteID32, C1UWrite_12c_1M0>;
+def : SchedAlias<WriteID64, C1UWrite_20c_1M0>;
+
+def : SchedAlias<WriteIM32, C1UWrite_2c_1M>;
+def : SchedAlias<WriteIM64, C1UWrite_2c_1M>;
+
+// Multiply accumulate, W-form
+// Multiply accumulate, X-form
+def : InstRW<[C1UWr_MA, C1URd_MA], (instregex "^M(ADD|SUB)[WX]rrr$")>;
+
+// Multiply accumulate long
+def : InstRW<[C1UWr_MA, C1URd_MA], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
----------------
nasherm wrote:
Done
https://github.com/llvm/llvm-project/pull/182251
More information about the llvm-commits
mailing list