[llvm] [AArch64] Initial Olympus scheduling model. (PR #171607)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 10 04:25:49 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Ricardo Jesus (rj-jesus)
<details>
<summary>Changes</summary>
This adds a scheduling model for NVIDIA Olympus based on information from the Olympus CPU Core Software Optimization Guide:
https://docs.nvidia.com/olympus-cpu-core-software-optimization-guide-dp12531-001v0-7.pdf
Co-authored-by: Elvina Yakubova <eyakubova@<!-- -->nvidia.com>
---
Patch is 2.93 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171607.diff
17 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64.td (+3)
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+4-3)
- (modified) llvm/lib/Target/AArch64/AArch64Processors.td (+1-1)
- (added) llvm/lib/Target/AArch64/AArch64SchedOlympus.td (+3377)
- (modified) llvm/test/CodeGen/AArch64/rcpc3-sve.ll (+1-1)
- (added) llvm/test/tools/llvm-mca/AArch64/Olympus/basic-extend-instructions.s (+1014)
- (added) llvm/test/tools/llvm-mca/AArch64/Olympus/basic-instructions.s (+3801)
- (added) llvm/test/tools/llvm-mca/AArch64/Olympus/clear-upper-regs.s (+962)
- (added) llvm/test/tools/llvm-mca/AArch64/Olympus/flag-manipulation-instructions.s (+72)
- (added) llvm/test/tools/llvm-mca/AArch64/Olympus/forwarding-idioms.s (+2276)
- (added) llvm/test/tools/llvm-mca/AArch64/Olympus/neon-fp8-instructions.s (+150)
- (added) llvm/test/tools/llvm-mca/AArch64/Olympus/neon-instructions.s (+5310)
- (added) llvm/test/tools/llvm-mca/AArch64/Olympus/sve-fp8-instructions.s (+123)
- (added) llvm/test/tools/llvm-mca/AArch64/Olympus/sve-instructions.s (+10680)
- (added) llvm/test/tools/llvm-mca/AArch64/Olympus/writeback-loads-stores.s (+3983)
- (added) llvm/test/tools/llvm-mca/AArch64/Olympus/zero-mov-idioms.s (+90)
- (modified) llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp (+4)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 1a4367b84353b..db3b19fcf43dd 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -65,6 +65,7 @@ def SVE2p1Unsupported : AArch64Unsupported;
def SVE2Unsupported : AArch64Unsupported {
let F = !listconcat([HasSVE2, HasSVE2_or_SME, HasNonStreamingSVE2_or_SME2, HasSSVE_FP8FMA, HasSMEF8F16,
+ HasSSVE_FP8DOT2, HasSSVE_FP8DOT4,
HasSMEF8F32, HasSVEAES, HasSVESHA3, HasSVESM4, HasSVEBitPerm,
HasSVEB16B16],
SVE2p1Unsupported.F);
@@ -95,6 +96,7 @@ def SME2p1Unsupported : AArch64Unsupported {
def SME2Unsupported : AArch64Unsupported {
let F = !listconcat([HasSME2, HasNonStreamingSVE2_or_SME2, HasSVE2p1_or_SME2, HasSSVE_FP8FMA,
+ HasSSVE_FP8DOT2, HasSSVE_FP8DOT4,
HasSMEF8F16, HasSMEF8F32, HasSMEF16F16_or_SMEF8F16, HasSMEB16B16,
HasNonStreamingSVE_or_SSVE_AES, HasSVE2p1_or_StreamingSME2],
SME2p1Unsupported.F);
@@ -137,6 +139,7 @@ include "AArch64SchedNeoverseV1.td"
include "AArch64SchedNeoverseV2.td"
include "AArch64SchedNeoverseV3.td"
include "AArch64SchedNeoverseV3AE.td"
+include "AArch64SchedOlympus.td"
include "AArch64SchedOryon.td"
include "AArch64Processors.td"
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4d2e740779961..e84d71dc2c7c7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -2075,7 +2075,7 @@ class SpecialReturn<bits<4> opc, string asm>
let mayLoad = 1 in
class RCPCLoad<bits<2> sz, string asm, RegisterClass RC>
: I<(outs RC:$Rt), (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]", "", []>,
- Sched<[]> {
+ Sched<[WriteLD]> {
bits<5> Rn;
bits<5> Rt;
let Inst{31-30} = sz;
@@ -6559,7 +6559,8 @@ class BaseSIMDThreeVectors<bit Q, bit U, bits<2> size, bits<4> op,
RegisterOperand regtype2, string asm,
string kind1, string kind2>
: I<(outs regtype1:$Rd), (ins regtype2:$Rn, regtype2:$Rm), asm,
- "\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2, "", []>, Sched<[]> {
+ "\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2, "", []>,
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -8593,7 +8594,7 @@ class BaseSIMDTableLookupIndexed<bit Q, bits<5> opc,
: I<(outs V128:$Rd),
(ins listtype:$Rn, V128:$Rm, idx_type:$idx),
asm, "\t$Rd" # kind # ", $Rn, $Rm$idx", "", []>,
- Sched<[]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 72882ac078c55..b21898a7d492f 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -1397,7 +1397,7 @@ def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel,
[TuneCarmel]>;
// NVIDIA Olympus
-def : ProcessorModel<"olympus", NeoverseV2Model, ProcessorFeatures.Olympus,
+def : ProcessorModel<"olympus", OlympusModel, ProcessorFeatures.Olympus,
[TuneOlympus]>;
// Ampere Computing
diff --git a/llvm/lib/Target/AArch64/AArch64SchedOlympus.td b/llvm/lib/Target/AArch64/AArch64SchedOlympus.td
new file mode 100644
index 0000000000000..f10ba7143d32a
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedOlympus.td
@@ -0,0 +1,3377 @@
+//=- AArch64SchedOlympus.td - Olympus Scheduling Defs --------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the NVIDIA Olympus processors.
+// All information is taken from the Olympus Software Optimisation guide:
+//
+// https://docs.nvidia.com/olympus-cpu-core-software-optimization-guide-dp12531-001v0-7.pdf
+//
+//===----------------------------------------------------------------------===//
+
+def OlympusModel : SchedMachineModel {
+ // NOTE: Unless otherwise stated, values unspecified in the Olympus SWOG are
+ // copied from the Neoverse V2 model.
+ let IssueWidth = 10; // Maximum macro-ops dispatched per cycle.
+ let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer.
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 10; // Extra cycles for mispredicted branch.
+ let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
+ [HasSVE2p1, HasSVEB16B16,
+ HasCPA, HasCSSC,
+ HasMatMulFP64]);
+}
+
+//===----------------------------------------------------------------------===//
+// In the Olympus core, instructions are first fetched and decoded into
+// internal macro-ops (MOps). Those MOps are then renamed and dispatched to the
+// out-of-order portion of the core. A MOp can be split into two micro-ops
+// (uOps) further down the pipeline after instruction decode. Once dispatched,
+// a uOp waits for its operands to become available and issues out-of-order to
+// one of many execution pipelines. Each execution pipeline can accept one uOp
+// per cycle.
+
+let SchedModel = OlympusModel in {
+
+// Note: The RCU is not specified in the SWOG, therefore we assume we can commit
+// as many MOps as we can dispatch each cycle.
+def OlympusRCU : RetireControlUnit<OlympusModel.MicroOpBufferSize, 10>;
+
+// Define the issue ports.
+def OlympusUnitB : ProcResource<4>; // Branch 0/1/2/3
+def OlympusUnitS : ProcResource<6>; // Integer single-cycle 0/1/2/3/4/5
+def OlympusUnitM0 : ProcResource<1>; // Integer single/multicycle 0
+def OlympusUnitM1 : ProcResource<1>; // Integer single/multicycle 1
+def OlympusUnitL : ProcResource<4>; // Load 0/1/2/3
+def OlympusUnitSA : ProcResource<2>; // Store 0/1
+def OlympusUnitD : ProcResource<2>; // Integer store data 0/1
+def OlympusUnitV0 : ProcResource<1>; // FP/ASIMD 0
+def OlympusUnitV1 : ProcResource<1>; // FP/ASIMD 1
+def OlympusUnitV2 : ProcResource<1>; // FP/ASIMD 2
+def OlympusUnitV3 : ProcResource<1>; // FP/ASIMD 3
+def OlympusUnitV45 : ProcResource<2>; // FP/ASIMD 4/5
+def OlympusUnitF : ProcResource<6>; // Flags
+
+def OlympusUnitM : ProcResGroup<[OlympusUnitM0, OlympusUnitM1]>; // Integer single/multicycle 0/1
+def OlympusUnitI : ProcResGroup<[OlympusUnitS, OlympusUnitM0, OlympusUnitM1]>; // Integer single-cycle 0/1/2/3/4/5 and single/multicycle 0/1
+def OlympusUnitV03 : ProcResGroup<[OlympusUnitV0, OlympusUnitV3]>; // FP/ASIMD 0/3
+def OlympusUnitV12 : ProcResGroup<[OlympusUnitV1, OlympusUnitV2]>; // FP/ASIMD 1/2
+def OlympusUnitV0123 : ProcResGroup<[OlympusUnitV0, OlympusUnitV1, OlympusUnitV2, OlympusUnitV3]>; // FP/ASIMD 0/1/2/3 (also used for vector store data)
+def OlympusUnitV : ProcResGroup<[OlympusUnitV0, OlympusUnitV1, OlympusUnitV2, OlympusUnitV3, OlympusUnitV45]>; // FP/ASIMD 0/1/2/3/4/5
+
+// No forwarding is provided for these types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Olympus.
+
+// Define generic 0 micro-op types
+
+let NumMicroOps = 0 in {
+ def OlympusWrite_0c : SchedWriteRes<[]> { let Latency = 0; }
+ def OlympusWrite_6c : SchedWriteRes<[]> { let Latency = 6; }
+} // NumMicroOps = 0
+
+// Define generic 1 micro-op types
+
+def OlympusWrite_1c_1B : SchedWriteRes<[OlympusUnitB]> { let Latency = 1; }
+def OlympusWrite_1c_1I : SchedWriteRes<[OlympusUnitI]> { let Latency = 1; }
+def OlympusWrite_1c_1M : SchedWriteRes<[OlympusUnitM]> { let Latency = 1; }
+def OlympusWrite_1c_1M0 : SchedWriteRes<[OlympusUnitM0]> { let Latency = 1; }
+def OlympusWrite_1c_1L : SchedWriteRes<[OlympusUnitL]> { let Latency = 1; }
+def OlympusWrite_2c_1M : SchedWriteRes<[OlympusUnitM]> { let Latency = 2; }
+def OlympusWrite_2c_1M0 : SchedWriteRes<[OlympusUnitM0]> { let Latency = 2; }
+def OlympusWrite_2c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 2; }
+def OlympusWrite_2c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 2; }
+def OlympusWrite_2c_1V0123 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 2; }
+def OlympusWrite_2c_1V03 : SchedWriteRes<[OlympusUnitV03]> { let Latency = 2; }
+def OlympusWrite_2c_1V1 : SchedWriteRes<[OlympusUnitV1]> { let Latency = 2; }
+def OlympusWrite_3c_1M : SchedWriteRes<[OlympusUnitM]> { let Latency = 3; }
+def OlympusWrite_3c_1M0 : SchedWriteRes<[OlympusUnitM0]> { let Latency = 3; }
+def OlympusWrite_3c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 3; }
+def OlympusWrite_3c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 3; }
+def OlympusWrite_3c_1V0123 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 3; }
+def OlympusWrite_3c_1V03 : SchedWriteRes<[OlympusUnitV03]> { let Latency = 3; }
+def OlympusWrite_3c_1V1 : SchedWriteRes<[OlympusUnitV1]> { let Latency = 3; }
+def OlympusWrite_3c_1V12 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 3; }
+def OlympusWrite_4c_1M0 : SchedWriteRes<[OlympusUnitM0]> { let Latency = 4; }
+def OlympusWrite_4c_1L : SchedWriteRes<[OlympusUnitL]> { let Latency = 4; }
+def OlympusWrite_4c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; }
+def OlympusWrite_4c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 4; }
+def OlympusWrite_4c_1V0123 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 4; }
+def OlympusWrite_4c_1V03 : SchedWriteRes<[OlympusUnitV03]> { let Latency = 4; }
+def OlympusWrite_4c_1V1 : SchedWriteRes<[OlympusUnitV1]> { let Latency = 4; }
+def OlympusWrite_5c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 5; }
+def OlympusWrite_6c_1L : SchedWriteRes<[OlympusUnitL]> { let Latency = 6; }
+def OlympusWrite_6c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 6; }
+def OlympusWrite_6c_1V1 : SchedWriteRes<[OlympusUnitV1]> { let Latency = 6; }
+def OlympusWrite_6c_1V12 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 6; }
+def OlympusWrite_7c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 7; let ReleaseAtCycles = [7]; }
+def OlympusWrite_8c_1V12 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 8; }
+def OlympusWrite_9c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 9; }
+def OlympusWrite_10c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 10; }
+def OlympusWrite_10c_1V1 : SchedWriteRes<[OlympusUnitV1]> { let Latency = 10; }
+def OlympusWrite_12c_1M : SchedWriteRes<[OlympusUnitM]> { let Latency = 12; let ReleaseAtCycles = [12]; }
+def OlympusWrite_12c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 12; }
+def OlympusWrite_12c_1V45 : SchedWriteRes<[OlympusUnitV45]> { let Latency = 12; let ReleaseAtCycles = [12]; }
+def OlympusWrite_13c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 13; }
+def OlympusWrite_13c_1V12 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 13; }
+def OlympusWrite_15c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 15; }
+def OlympusWrite_16c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 16; }
+def OlympusWrite_20c_1M : SchedWriteRes<[OlympusUnitM]> { let Latency = 20; let ReleaseAtCycles = [20]; }
+def OlympusWrite_20c_1V45 : SchedWriteRes<[OlympusUnitV45]> { let Latency = 20; let ReleaseAtCycles = [20]; }
+
+// These types are multi-pumped.
+def OlympusWrite_4c_1V0123_2 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 4; let ReleaseAtCycles = [2]; }
+def OlympusWrite_5c_1V0123_2 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 5; let ReleaseAtCycles = [2]; }
+def OlympusWrite_6c_1V0123_4 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 6; let ReleaseAtCycles = [4]; }
+def OlympusWrite_7c_1V03_6 : SchedWriteRes<[OlympusUnitV03]> { let Latency = 7; let ReleaseAtCycles = [6]; }
+def OlympusWrite_9c_1V12_2 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 9; let ReleaseAtCycles = [2]; }
+def OlympusWrite_9c_1V12_4 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 9; let ReleaseAtCycles = [4]; }
+def OlympusWrite_11c_1V03_10 : SchedWriteRes<[OlympusUnitV03]> { let Latency = 11; let ReleaseAtCycles = [10]; }
+def OlympusWrite_11c_1V12_4 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 11; let ReleaseAtCycles = [4]; }
+def OlympusWrite_13c_1V12_8 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 13; let ReleaseAtCycles = [8]; }
+def OlympusWrite_14c_1V12_2 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 14; let ReleaseAtCycles = [2]; }
+
+// Hack to get the flagset throughputs right.
+def OlympusWrite_1c_1F : SchedWriteRes<[OlympusUnitI, OlympusUnitF]> { let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Define generic 2 micro-op types
+
+let NumMicroOps = 2 in {
+
+ def OlympusWrite_1c_1I_1B : SchedWriteRes<[OlympusUnitI, OlympusUnitB]> { let Latency = 1; }
+ def OlympusWrite_1c_1M0_1M : SchedWriteRes<[OlympusUnitM0, OlympusUnitM]> { let Latency = 1; }
+ def OlympusWrite_1c_1SA_1D : SchedWriteRes<[OlympusUnitSA, OlympusUnitD]> { let Latency = 1; }
+ def OlympusWrite_1c_2M : SchedWriteRes<[OlympusUnitM, OlympusUnitM]> { let Latency = 1; }
+ def OlympusWrite_2c_1L_1V : SchedWriteRes<[OlympusUnitL, OlympusUnitV]> { let Latency = 2; }
+ def OlympusWrite_2c_1M0_1M : SchedWriteRes<[OlympusUnitM0, OlympusUnitM]> { let Latency = 2; }
+ def OlympusWrite_2c_1M_1V03 : SchedWriteRes<[OlympusUnitV03, OlympusUnitM]> { let Latency = 2; }
+ def OlympusWrite_2c_1V0_1M : SchedWriteRes<[OlympusUnitV0, OlympusUnitM]> { let Latency = 2; }
+ def OlympusWrite_3c_1I_1M : SchedWriteRes<[OlympusUnitI, OlympusUnitM]> { let Latency = 3; }
+ def OlympusWrite_3c_1M_1M0 : SchedWriteRes<[OlympusUnitM, OlympusUnitM0]> { let Latency = 3; }
+ def OlympusWrite_3c_1M_1V03 : SchedWriteRes<[OlympusUnitV03, OlympusUnitM]> { let Latency = 3; }
+ def OlympusWrite_3c_1SA_1V0123 : SchedWriteRes<[OlympusUnitSA, OlympusUnitV0123]> { let Latency = 3; }
+ def OlympusWrite_3c_1V0_1M : SchedWriteRes<[OlympusUnitV0, OlympusUnitM]> { let Latency = 3; }
+ def OlympusWrite_3c_2V03 : SchedWriteRes<[OlympusUnitV03, OlympusUnitV03]> { let Latency = 3; }
+ def OlympusWrite_3c_2M : SchedWriteRes<[OlympusUnitM, OlympusUnitM]> { let Latency = 3; }
+ def OlympusWrite_4c_1L_1V : SchedWriteRes<[OlympusUnitL, OlympusUnitV]> { let Latency = 4; }
+ def OlympusWrite_4c_1M_1M0 : SchedWriteRes<[OlympusUnitM, OlympusUnitM0]> { let Latency = 4; }
+ def OlympusWrite_4c_1SA_1D : SchedWriteRes<[OlympusUnitSA, OlympusUnitD]> { let Latency = 4; }
+ def OlympusWrite_4c_2M : SchedWriteRes<[OlympusUnitM, OlympusUnitM]> { let Latency = 4; }
+ def OlympusWrite_4c_2V : SchedWriteRes<[OlympusUnitV, OlympusUnitV]> { let Latency = 4; }
+ def OlympusWrite_4c_2V0 : SchedWriteRes<[OlympusUnitV0, OlympusUnitV0]> { let Latency = 4; }
+ def OlympusWrite_5c_1B_1M0 : SchedWriteRes<[OlympusUnitB, OlympusUnitM0]> { let Latency = 5; }
+ def OlympusWrite_5c_1I_1L : SchedWriteRes<[OlympusUnitI, OlympusUnitL]> { let Latency = 5; }
+ def OlympusWrite_5c_1L_1F : SchedWriteRes<[OlympusUnitL, OlympusUnitF]> { let Latency = 5; }
+ def OlympusWrite_5c_1M0_1V : SchedWriteRes<[OlympusUnitM0, OlympusUnitV]> { let Latency = 5; }
+ def OlympusWrite_5c_1M_1L : SchedWriteRes<[OlympusUnitM, OlympusUnitL]> { let Latency = 5; }
+ def OlympusWrite_5c_1M_1V : SchedWriteRes<[OlympusUnitM, OlympusUnitV]> { let Latency = 5; }
+ def OlympusWrite_5c_2V : SchedWriteRes<[OlympusUnitV, OlympusUnitV]> { let Latency = 5; }
+ def OlympusWrite_5c_2V0 : SchedWriteRes<[OlympusUnitV0, OlympusUnitV0]> { let Latency = 5; }
+ def OlympusWrite_5c_1V_1V0123 : SchedWriteRes<[OlympusUnitV, OlympusUnitV0123]> { let Latency = 5; }
+ def OlympusWrite_6c_1I_1L : SchedWriteRes<[OlympusUnitI, OlympusUnitL]> { let Latency = 6; }
+ def OlympusWrite_6c_1L_1S : SchedWriteRes<[OlympusUnitL, OlympusUnitS]> { let Latency = 6; }
+ def OlympusWrite_6c_1V03_1V12 : SchedWriteRes<[OlympusUnitV03, OlympusUnitV12]> { let Latency = 6; }
+ def OlympusWrite_6c_1V1_1M0 : SchedWriteRes<[OlympusUnitV1, OlympusUnitM0]> { let Latency = 6; }
+ def OlympusWrite_6c_1V_1V0123 : SchedWriteRes<[OlympusUnitV, OlympusUnitV0123]> { let Latency = 6; }
+ def OlympusWrite_6c_2L : SchedWriteRes<[OlympusUnitL, OlympusUnitL]> { let Latency = 6; }
+ def OlympusWrite_6c_2V : SchedWriteRes<[OlympusUnitV, OlympusUnitV]> { let Latency = 6; }
+ def OlympusWrite_6c_2V12 : SchedWriteRes<[OlympusUnitV12, OlympusUnitV12]> { let Latency = 6; }
+ def OlympusWrite_6c_2V0123 : SchedWriteRes<[OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 6; }
+ def OlympusWrite_7c_1F_1L : SchedWriteRes<[OlympusUnitF, OlympusUnitL]> { let Latency = 7; }
+ def OlympusWrite_7c_1I_1L : SchedWriteRes<[OlympusUnitI, OlympusUnitL]> { let Latency = 7; }
+ def OlympusWrite_7c_1M_1V0123 : SchedWriteRes<[OlympusUnitM, OlympusUnitV0123]> { let Latency = 7; }
+ def OlympusWrite_8c_1L_1V : SchedWriteRes<[OlympusUnitL, OlympusUnitV]> { let Latency = 8; }
+ def OlympusWrite_8c_1M0_1L : SchedWriteRes<[OlympusUnitM0, OlympusUnitL]> { let Latency = 8; }
+
+ // These types are multi-pumped.
+ def OlympusWrite_8c_1M_1V0123_2 : SchedWriteRes<[OlympusUnitM, OlympusUnitV0123]> { let Latency = 8; let ReleaseAtCycles = [1, 2]; }
+
+} // NumMicroOps = 2
+
+//===----------------------------------------------------------------------===//
+// Define generic 3 micro-op types
+
+let NumMicroOps = 3 in {
+
+ def OlympusWrite_2c_1L_1S_1V : SchedWriteRes<[OlympusUnitL, OlympusUnitS, OlympusUnitV]> { let Latency = 2; }
+ def OlympusWrite_2c_1SA_1D_1I : SchedWriteRes<[OlympusUnitSA, OlympusUnitD, OlympusUnitI]> { let Latency = 2; }
+ def OlympusWrite_3c_1I_1SA_1V0123 : SchedWriteRes<[OlympusUnitI, OlympusUnitSA, OlympusUnitV0123]> { let Latency = 3; }
+ def OlympusWrite_4c_3V : SchedWriteRes<[OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 4; }
+ def OlympusWrite_5c_1I_1B_1M0 : SchedWriteRes<[OlympusUnitI, OlympusUnitB, OlympusUnitM0]> { let Latency = 5; }
+ def OlympusWrite_5c_1SA_1V_1V0123 : SchedWriteRes<[OlympusUnitSA, OlympusUnitV, OlympusUnitV0123]> { let Latency = 5; }
+ def OlympusWrite_6c_3L : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitL]> { let Latency = 6; }
+ def OlympusWrite_6c_3V : SchedWriteRes<[OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 6;...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171607
More information about the llvm-commits
mailing list