[llvm] [AArch64] C1-Ultra Scheduling model (PR #182251)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 09:14:36 PST 2026
================
@@ -0,0 +1,2370 @@
+//=- AArch64SchedC1Ultra.td - C1 Ultra Scheduling Defs --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the C1 Ultra processors.
+// Information is taken from the C1 Ultra Software Optimization Guide
+//
+// https://developer.arm.com/documentation/111079/3-0
+//
+//===----------------------------------------------------------------------===//
+
+def C1UltraModel : SchedMachineModel {
+ let IssueWidth = 10;
+ let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer. NOTE: Copied from Neoverse-V3
+ let LoadLatency = 4; // Optimistic load latency. NOTE: Copied from Neoverse-V3
+ let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N3.
+ let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
+ [HasSVE2p1, HasSVEB16B16,
+ HasCPA, HasCSSC]);
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on C1 Ultra.
+// Instructions are first fetched and then decoded into internal macro-ops
+// (MOPs). From there, the MOPs proceed through register renaming and dispatch
+// stages. A MOP can be split into two micro-ops further down the pipeline
+// after the decode stage.
+
+let SchedModel = C1UltraModel in {
+
+// Define the issue ports.
+def C1UUnitB : ProcResource<3>; // Branch 0/1/2
+def C1UUnitS0 : ProcResource<1>; // Integer single-cycle 0
+def C1UUnitS1 : ProcResource<1>; // Integer single-cycle 1
+def C1UUnitS2 : ProcResource<1>; // Integer single-cycle 2
+def C1UUnitS3 : ProcResource<1>; // Integer single-cycle 3
+def C1UUnitS4 : ProcResource<1>; // Integer single-cycle 4
+def C1UUnitS5 : ProcResource<1>; // Integer single-cycle 5
+def C1UUnitM0 : ProcResource<1>; // Integer single/multicycle 0
+def C1UUnitM1 : ProcResource<1>; // Integer single/multicycle 1
+def C1UUnitV0 : ProcResource<1>; // FP/ASIMD 0
+def C1UUnitV1 : ProcResource<1>; // FP/ASIMD 1
+def C1UUnitV2 : ProcResource<1>; // FP/ASIMD 2
+def C1UUnitV3 : ProcResource<1>; // FP/ASIMD 3
+def C1UUnitV4 : ProcResource<1>; // FP/ASIMD 4
+def C1UUnitV5 : ProcResource<1>; // FP/ASIMD 5
+def C1UUnitLS0 : ProcResource<1>; // Load/Store 0
+def C1UUnitLS1 : ProcResource<1>; // Load/Store 1
+def C1UUnitL23 : ProcResource<2>; // Load 2/3
+def C1UUnitD : ProcResource<2>; // Store data 0/1
+def C1UUnitCME : ProcResource<1>; // CME operations block
+def C1UUnitFlg : ProcResource<4>; // Flags
+
+def C1UUnitS : ProcResGroup<[C1UUnitS0, C1UUnitS1, C1UUnitS2, C1UUnitS3,
+ C1UUnitS4, C1UUnitS5]>;
+def C1UUnitI : ProcResGroup<[C1UUnitS0, C1UUnitS1, C1UUnitS2, C1UUnitS3,
+ C1UUnitS4, C1UUnitS5, C1UUnitM0, C1UUnitM1]>;
+def C1UUnitI4 : ProcResGroup<[C1UUnitS0, C1UUnitS2, C1UUnitS4, C1UUnitM0]>;
+def C1UUnitM : ProcResGroup<[C1UUnitM0, C1UUnitM1]>;
+def C1UUnitL : ProcResGroup<[C1UUnitLS0, C1UUnitLS1, C1UUnitL23]>;
+def C1UUnitSA : ProcResGroup<[C1UUnitLS0, C1UUnitLS1]>;
+def C1UUnitV : ProcResGroup<[C1UUnitV0, C1UUnitV1, C1UUnitV2,
+ C1UUnitV3, C1UUnitV4, C1UUnitV5]>;
+def C1UUnitV01 : ProcResGroup<[C1UUnitV0, C1UUnitV1]>;
+def C1UUnitV02 : ProcResGroup<[C1UUnitV0, C1UUnitV2]>;
+def C1UUnitV13 : ProcResGroup<[C1UUnitV1, C1UUnitV3]>;
+def C1UUnitV0123 : ProcResGroup<[C1UUnitV0, C1UUnitV1,
+ C1UUnitV2, C1UUnitV3]>;
+def C1UUnitV0134 : ProcResGroup<[C1UUnitV0, C1UUnitV1, C1UUnitV3, C1UUnitV4]>;
+
+// Define commonly used read types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// NOTE: Copied from NeoverseC1U
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to C1 Ultra.
+// TODO: Flesh out with C1 Ultra latencies and port usage.
+//===----------------------------------------------------------------------===//
+
+// Define generic 0 micro-op types.
+def C1UWrite_0c : SchedWriteRes<[]> { let Latency = 0; }
+
+// Define a small set of generic 1 micro-op types as placeholders.
+def C1UWrite_1c_1B : SchedWriteRes<[C1UUnitB]> { let Latency = 1; }
+def C1UWrite_1c_1I : SchedWriteRes<[C1UUnitI]> { let Latency = 1; }
+def C1UWrite_2c_1I : SchedWriteRes<[C1UUnitI]> { let Latency = 2; }
+def C1UWrite_2c_1I4 : SchedWriteRes<[C1UUnitI4]> { let Latency = 2; }
+def C1UWrite_1c_1M : SchedWriteRes<[C1UUnitM]> { let Latency = 1; }
+def C1UWrite_2c_1M : SchedWriteRes<[C1UUnitM]> { let Latency = 2; }
+def C1UWrite_2c_1M0 : SchedWriteRes<[C1UUnitM0]> { let Latency = 2; }
+def C1UWrite_3c_1M0 : SchedWriteRes<[C1UUnitM0]> { let Latency = 3; }
+def C1UWrite_4c_1M0 : SchedWriteRes<[C1UUnitM0]> { let Latency = 4; }
+def C1UWrite_12c_1M0 : SchedWriteRes<[C1UUnitM0]> { let Latency = 12;
+ let ReleaseAtCycles = [12]; }
+def C1UWrite_20c_1M0 : SchedWriteRes<[C1UUnitM0]> { let Latency = 20;
+ let ReleaseAtCycles = [20]; }
+def C1UWrite_1c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 1; }
+def C1UWrite_2c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 2; }
+def C1UWrite_3c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1UWrite_4c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1UWrite_6c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 6; }
+def C1UWrite_8c_1V : SchedWriteRes<[C1UUnitV]> { let Latency = 8; }
+def C1UWrite_1c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 1; }
+def C1UWrite_2c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 2; }
+def C1UWrite_4c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 4; }
+def C1UWrite_6c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 6; }
+def C1UWrite_10c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 10;}
+def C1UWrite_12c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 12; }
+def C1UWrite_20c_1V0 : SchedWriteRes<[C1UUnitV0]> { let Latency = 20; }
+def C1UWrite_3c_1V1 : SchedWriteRes<[C1UUnitV1]> { let Latency = 3; }
+def C1UWrite_5c_1V1 : SchedWriteRes<[C1UUnitV1]> { let Latency = 5; }
+def C1UWrite_8c_1V1 : SchedWriteRes<[C1UUnitV1]> { let Latency = 8; }
+def C1UWrite_12c_1V1 : SchedWriteRes<[C1UUnitV1]> { let Latency = 12; }
+def C1UWrite_2c_V01 : SchedWriteRes<[C1UUnitV01]> { let Latency = 2; }
+def C1UWrite_3c_V01 : SchedWriteRes<[C1UUnitV01]> { let Latency = 3; }
+def C1UWrite_2c_V0134 : SchedWriteRes<[ C1UUnitV01, C1UUnitV3,
+ C1UUnitV4]> { let Latency = 2; }
+def C1UWrite_3c_V0134 : SchedWriteRes<[ C1UUnitV01, C1UUnitV3,
+ C1UUnitV4]> { let Latency = 3; }
+def C1UWrite_4c_V0134 : SchedWriteRes<[ C1UUnitV01, C1UUnitV3,
+ C1UUnitV4]> { let Latency = 4; }
+def C1UWrite_6c_V0134 : SchedWriteRes<[ C1UUnitV01, C1UUnitV3,
+ C1UUnitV4]> { let Latency = 6; }
+def C1UWrite_1c_1L : SchedWriteRes<[C1UUnitL]> { let Latency = 1; }
+def C1UWrite_4c_1L : SchedWriteRes<[C1UUnitL]> { let Latency = 4; }
+def C1UWrite_6c_1L : SchedWriteRes<[C1UUnitL]> { let Latency = 6; }
+def C1UWrite_1c_1SA : SchedWriteRes<[C1UUnitSA]> { let Latency = 1; }
+
+def C1UWrite_2c_1B_1S : SchedWriteRes<[C1UUnitB, C1UUnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_1c_1I_1Flg : SchedWriteRes<[C1UUnitI, C1UUnitFlg]> { let Latency = 1; }
+def C1UWrite_2c_1I_1Flg : SchedWriteRes<[C1UUnitI, C1UUnitFlg]> { let Latency = 2; }
+
+def C1UWrite_1c_1SA_1D : SchedWriteRes<[C1UUnitSA, C1UUnitD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_1c_1SA_1D_1I : SchedWriteRes<[C1UUnitSA, C1UUnitD, C1UUnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_2c_1SA_V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01]> {
+ let Latency = 2;
+}
+
+def C1UWrite_2c_1SA_V01_1I : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitI]> {
+ let Latency = 2;
+}
+
+def C1UWrite_2c_1I_1M : SchedWriteRes<[C1UUnitI, C1UUnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_2c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_3c_1I_1M : SchedWriteRes<[C1UUnitI, C1UUnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_5c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_6c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_7c_1L_1I : SchedWriteRes<[C1UUnitL, C1UUnitI]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_5c_1M0_1V : SchedWriteRes<[C1UUnitM0, C1UUnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_4c_V0134_1V : SchedWriteRes<[C1UUnitV01, C1UUnitV3, C1UUnitV4, C1UUnitV]> {
+ let Latency = 4;
+}
+
+def C1UWrite_8c_V1_4rc : SchedWriteRes<[C1UUnitV1]> {
+ let Latency = 8;
+ let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_9c_V1_2rc : SchedWriteRes<[C1UUnitV1]> {
+ let Latency = 9;
+ let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_12c_V1_8rc : SchedWriteRes<[C1UUnitV1]> {
+ let Latency = 12;
+ let ReleaseAtCycles = [8];
+}
+
+def C1UWrite_11c_V1_4rc : SchedWriteRes<[C1UUnitV1]> {
+ let Latency = 11;
+ let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_13c_V1_2rc : SchedWriteRes<[C1UUnitV1]> {
+ let Latency = 13;
+ let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_6c_1M0_1B : SchedWriteRes<[C1UUnitM0, C1UUnitB]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_6c_1M0_1B_1I : SchedWriteRes<[C1UUnitM0, C1UUnitB, C1UUnitI]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_9c_1M0_1L : SchedWriteRes<[C1UUnitM0, C1UUnitL]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def C1UWr_IM : SchedWriteRes<[C1UUnitI4]> { let Latency = 3; }
+
+def C1UWrite_6c_2L : SchedWriteRes<[C1UUnitL, C1UUnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+
+def C1UWrite_6c_3L : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_7c_4L : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL, C1UUnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+
+def C1UWrite_8c_1L_1V : SchedWriteRes<[C1UUnitL, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_6c_1L_1V : SchedWriteRes<[C1UUnitL, C1UUnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_8c_1L_2V : SchedWriteRes<[C1UUnitL, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_8c_2L_2V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def C1UWrite_8c_2L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitV, C1UUnitV,
+ C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+def C1UWrite_8C_3L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitV, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+}
+
+def C1UWrite_9c_3L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitV, C1UUnitV, C1UUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def C1UWrite_8c_3L_3V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitV, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+}
+
+def C1UWrite_9c_6L_4V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitL, C1UUnitL, C1UUnitV, C1UUnitV,
+ C1UUnitV, C1UUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 10;
+}
+
+def C1UWrite_8c_3L_4V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 7;
+}
+
+
+def C1UWrite_8c_4L_4V : SchedWriteRes<[C1UUnitL, C1UUnitL, C1UUnitL, C1UUnitL,
+ C1UUnitV, C1UUnitV, C1UUnitV, C1UUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+def C1UWrite_4c_1SA_V01_V : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_5c_1SA_V01_V : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_1SA_V01_V: SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+
+def C1UWrite_7c_1SA_V01_V: SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_7c_1M_1M0_1V : SchedWriteRes<[C1UUnitM, C1UUnitM0, C1UUnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_2V1 : SchedWriteRes<[C1UUnitV1, C1UUnitV1]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_8c_1M0_1V1_1V01 : SchedWriteRes<[C1UUnitM0, C1UUnitV1, C1UUnitV01]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_1V1_1V01 : SchedWriteRes<[C1UUnitV1, C1UUnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_7c_1M0_V0134 : SchedWriteRes<[C1UUnitM0, C1UUnitV0134]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_5c_1V_1M0 : SchedWriteRes<[C1UUnitV, C1UUnitM0]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_8c_2V_2V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV, C1UUnitV0134, C1UUnitV0134]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def C1UWrite_6c_1V_2V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV0134, C1UUnitV0134]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_2V_1V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV, C1UUnitV0134]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_4c_1V_1V0134 : SchedWriteRes<[C1UUnitV, C1UUnitV0134]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_12c_1V0_8rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 12;
+ let ReleaseAtCycles = [8];
+}
+
+def C1UWrite_11c_1V0_4rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 11;
+ let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_13c_1V0_2rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 12;
+ let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_12c_1V1_8rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 12;
+ let ReleaseAtCycles = [8];
+}
+
+def C1UWrite_11c_1V1_4rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 11;
+ let ReleaseAtCycles = [4];
+}
+
+def C1UWrite_13c_1V1_2rc : SchedWriteRes<[C1UUnitV0]> {
+ let Latency = 12;
+ let ReleaseAtCycles = [2];
+}
+
+def C1UWrite_6c_1L_1M : SchedWriteRes<[C1UUnitL, C1UUnitM]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_9c_1V01_1L : SchedWriteRes<[C1UUnitV01, C1UUnitL]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_9c_1I_1L_1V : SchedWriteRes<[C1UUnitI, C1UUnitL, C1UUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_10c_1L_1V : SchedWriteRes<[C1UUnitL, C1UUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_11c_1I_1L_1V : SchedWriteRes<[C1UUnitI, C1UUnitL, C1UUnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_2c_1SA_1V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_4c_1I_1V_1SA_1V : SchedWriteRes<[C1UUnitI, C1UUnitV, C1UUnitSA, C1UUnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+
+def C1UWrite_5c_1I_1V_1SA_1V : SchedWriteRes<[C1UUnitI, C1UUnitV, C1UUnitSA, C1UUnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def C1UWrite_7c_1I_1V_1SA_1V : SchedWriteRes<[C1UUnitI, C1UUnitV, C1UUnitSA, C1UUnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+
+def C1UWrite_2c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_4c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_5c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_6c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_7c_1SA_2V01 : SchedWriteRes<[C1UUnitSA, C1UUnitV01, C1UUnitV01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+def C1UWrite_3c_1M0_1M : SchedWriteRes<[C1UUnitM0, C1UUnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def C1UWrite_4c_1M0_1M : SchedWriteRes<[C1UUnitM0, C1UUnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// Forwarded types
+def C1UWr_FMA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_FMA : SchedReadAdvance<2, [WriteFMul, C1UWr_FMA]>;
+
+def C1UWr_VA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VA : SchedReadAdvance<3, [C1UWr_VA]>;
+
+def C1UWr_VDOT : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1URd_VDOT : SchedReadAdvance<2, [C1UWr_VDOT]>;
+
+def C1UWr_VMMA : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1URd_VMMA : SchedReadAdvance<2, [C1UWr_VMMA]>;
+
+
+def C1UWr_VMA : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_VMA : SchedReadAdvance<3, [C1UWr_VMA]>;
+
+def C1UWr_VMAH : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_VMAH : SchedReadAdvance<2, [C1UWr_VMAH]>;
+
+def C1UWr_VPA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VPA : SchedReadAdvance<3, [C1UWr_VPA]>;
+
+def C1UWr_VSA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VSA : SchedReadAdvance<3, [C1UWr_VSA]>;
+
+def C1UWr_VFCMA : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_VFCMA : SchedReadAdvance<3, [C1UWr_VFCMA]>;
+
+def C1UWr_VFMA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VFMA : SchedReadAdvance<2, [C1UWr_VFMA]>;
+
+def C1UWr_VBFDOT : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_VBFDOT : SchedReadAdvance<2, [C1UWr_VBFDOT]>;
+
+def C1UWr_VBFMMA : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_VBFMMA : SchedReadAdvance<1, [C1UWr_VBFMMA]>;
+
+def C1UWr_VBFMAL : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_VBFMAL : SchedReadAdvance<3, [C1UWr_VBFMAL]>;
+
+def C1UWr_ZA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZA : SchedReadAdvance<3, [C1UWr_ZA]>;
+def C1UWr_ZPA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZPA : SchedReadAdvance<3, [C1UWr_ZPA]>;
+def C1UWr_ZSA : SchedWriteRes<[C1UUnitV13]> { let Latency = 4; }
+def C1URd_ZSA : SchedReadAdvance<3, [C1UWr_ZSA]>;
+
+def C1UWr_ZDOTB : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1URd_ZDOTB : SchedReadAdvance<2, [C1UWr_ZDOTB]>;
+def C1UWr_ZDOTH : SchedWriteRes<[C1UUnitV0134]> { let Latency = 3; }
+def C1URd_ZDOTH : SchedReadAdvance<2, [C1UWr_ZDOTH]>;
+
+def C1UWr_ZCMABHS : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_ZCMABHS : SchedReadAdvance<3, [C1UWr_ZCMABHS]>;
+
+def C1UWr_ZMMA : SchedWriteRes<[C1UUnitV]> { let Latency = 3; }
+def C1URd_ZMMA : SchedReadAdvance<2, [C1UWr_ZMMA]>;
+
+def C1UWr_ZMA : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_ZMA : SchedReadAdvance<3, [C1UWr_ZMA]>;
+
+def C1UWr_ZMASQL : SchedWriteRes<[C1UUnitV0134]> { let Latency = 4; }
+def C1URd_ZMASQL : SchedReadAdvance<2, [C1UWr_ZMASQL]>;
+
+def C1UWr_ZFCMA : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZFCMA : SchedReadAdvance<3, [C1UWr_ZFCMA]>;
+
+def C1UWr_ZFMA : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZFMA : SchedReadAdvance<2, [C1UWr_ZFMA]>;
+
+def C1UWr_ZFMAL : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZFMAL : SchedReadAdvance<3, [C1UWr_ZFMAL]>;
+
+
+def C1UWr_ZBFDOT : SchedWriteRes<[C1UUnitV]> { let Latency = 4; }
+def C1URd_ZBFDOT : SchedReadAdvance<2, [C1UWr_ZBFDOT]>;
+def C1UWr_ZBFMMA : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZBFMMA : SchedReadAdvance<2, [C1UWr_ZBFMMA]>;
+def C1UWr_ZBFMAL : SchedWriteRes<[C1UUnitV]> { let Latency = 5; }
+def C1URd_ZBFMAL : SchedReadAdvance<3, [C1UWr_ZBFMAL]>;
+
+// Predicate controlled types
+def C1UWrite_ArithI : SchedWriteVariant<[
+ SchedVar<IsCheapLSL, [C1UWrite_1c_1I]>,
+ SchedVar<NoSchedPred, [C1UWrite_2c_1I]>]>;
+
+def C1UWrite_Extr : SchedWriteVariant<[
+ SchedVar<IsRORImmIdiomPred, [C1UWrite_1c_1I]>,
+ SchedVar<NoSchedPred, [C1UWrite_3c_1I_1M]>]>;
+
+def C1UWrite_LdrQ : SchedWriteVariant<[
+ SchedVar<NeoverseQForm, [C1UWrite_7c_1L_1I]>,
+ SchedVar<NoSchedPred, [C1UWrite_6c_1L]>]>;
+
+def C1UWrite_StrQ : SchedWriteVariant<[
+ SchedVar<NeoverseQForm, [C1UWrite_2c_1SA_V01_1I]>,
+ SchedVar<NoSchedPred, [C1UWrite_2c_1SA_V01]>]>;
+
+def C1UWrite_1or2c_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [C1UWrite_1c_1M]>,
+ SchedVar<NoSchedPred, [C1UWrite_2c_1M]>]>;
+
+def C1UWrite_2or3c_1V0 : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [C1UWrite_1c_1V0]>,
+ SchedVar<NoSchedPred, [C1UWrite_2c_1V0]>]>;
+
+def C1UWrite_2or4c_1SA_2V01 : SchedWriteVariant<[
+ SchedVar<SameZRegDstSrcPred, [C1UWrite_4c_1SA_2V01]>,
+ SchedVar<NoSchedPred, [C1UWrite_2c_1SA_2V01]>]>;
+
+def C1UWrite_4or6c_1SA_2V01 : SchedWriteVariant<[
+ SchedVar<SameZRegDstSrcPred, [C1UWrite_6c_1SA_2V01]>,
+ SchedVar<NoSchedPred, [C1UWrite_4c_1SA_2V01]>]>;
+
+def C1UWrite_5or7c_1SA_2V01 : SchedWriteVariant<[
+ SchedVar<SameZRegDstSrcPred, [C1UWrite_7c_1SA_2V01]>,
+ SchedVar<NoSchedPred, [C1UWrite_5c_1SA_2V01]>]>;
+
+
+def C1UWrite_3or4c_1M0_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [C1UWrite_4c_1M0_1M]>,
+ SchedVar<NoSchedPred, [C1UWrite_3c_1M0_1M]>]>;
+
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction scheduling classes.
+//===----------------------------------------------------------------------===//
+
+// Branch instructions
+// -----------------------------------------------------------------------------
+
+// Branch, immed
+// Compare and branch
+def : SchedAlias<WriteBr, C1UWrite_1c_1B>;
----------------
walkerkd wrote:
Should this be C1Write_2c_1B?
Same for the "Branch and link" below?
https://github.com/llvm/llvm-project/pull/182251
More information about the llvm-commits
mailing list