[llvm] [RISC-V] Base scheduling model for tt-ascalon-d8 (PR #120160)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 15:52:40 PST 2024
================
@@ -0,0 +1,333 @@
+//=- RISCVSchedTTAscalonD8.td - Tenstorrent Ascalon Scheduling Defs -----*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+
+def TTAscalonD8Model : SchedMachineModel {
+ let IssueWidth = 8; // 8-way decode and dispatch
+ let MicroOpBufferSize = 256; // 256 micro-op re-order buffer
+ let LoadLatency = 4; // Optimistic load latency
+ let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
+
+ // Enable partial & runtime unrolling.
+ let LoopMicroOpBufferSize = 16;
+
+ let CompleteModel = 0;
+
+ // TODO supported, but haven't added scheduling info yet
+ let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
+ HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
+ HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
+ HasStdExtZkr, HasVInstructions, HasVInstructionsI64];
+}
+
+let SchedModel = TTAscalonD8Model in {
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+let BufferSize = 16 in {
+ def AscalonLS : ProcResource<3>;
+ def AscalonFXA : ProcResource<1>; // ALU, FP/VEC -> INT, MUL, DIV, CSR
+ def AscalonFXB : ProcResource<1>; // ALU, INT -> FP/VEC
+ def AscalonFXC : ProcResource<2>; // ALU, BR
+ def AscalonFXD : ProcResource<2>; // ALU
+ def AscalonFP : ProcResource<2>;
+ def AscalonV : ProcResource<2>;
+}
+
+def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>;
+
+//===----------------------------------------------------------------------===//
+
+// Branching
+def : WriteRes<WriteJmp, [AscalonFXC]>;
+def : WriteRes<WriteJal, [AscalonFXC]>;
+def : WriteRes<WriteJalr, [AscalonFXC]>;
+
+// Integer arithmetic and logic
+def : WriteRes<WriteIALU32, [AscalonFX]>;
+def : WriteRes<WriteIALU, [AscalonFX]>;
+def : WriteRes<WriteShiftImm32, [AscalonFX]>;
+def : WriteRes<WriteShiftImm, [AscalonFX]>;
+def : WriteRes<WriteShiftReg32, [AscalonFX]>;
+def : WriteRes<WriteShiftReg, [AscalonFX]>;
+
+// Integer multiplication
+let Latency = 3 in {
+def : WriteRes<WriteIMul, [AscalonFXA]>;
+def : WriteRes<WriteIMul32, [AscalonFXA]>;
+}
+
+// Integer division
+// Worst case latency is used.
+
+let Latency = 7, ReleaseAtCycles = [7] in {
+ def : WriteRes<WriteIDiv32, [AscalonFXA]>;
+ def : WriteRes<WriteIDiv, [AscalonFXA]>;
+ def : WriteRes<WriteIRem32, [AscalonFXA]>;
+ def : WriteRes<WriteIRem, [AscalonFXA]>;
+}
+
+// Bitmanip
+def : WriteRes<WriteRotateImm, [AscalonFX]>;
+def : WriteRes<WriteRotateImm32, [AscalonFX]>;
+def : WriteRes<WriteRotateReg, [AscalonFX]>;
+def : WriteRes<WriteRotateReg32, [AscalonFX]>;
+
+def : WriteRes<WriteCLZ, [AscalonFX]>;
+def : WriteRes<WriteCLZ32, [AscalonFX]>;
+def : WriteRes<WriteCTZ, [AscalonFX]>;
+def : WriteRes<WriteCTZ32, [AscalonFX]>;
+
+def : WriteRes<WriteCPOP, [AscalonFX]>;
----------------
topperc wrote:
I'm a little surprised CPOP is 1 cycle and on every ALU.
https://github.com/llvm/llvm-project/pull/120160
More information about the llvm-commits
mailing list