[clang] [llvm] [RISCV] Add a generic OOO CPU (PR #120712)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 2 08:05:04 PST 2025
================
@@ -0,0 +1,494 @@
+//===-- RISCVSchedGenericOOO.td - Generic O3 Processor -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// We assume that:
+// * 6-issue out-of-order CPU with 192 ROB entries.
+// * Units:
+// * IXU (Integer GenericOOOALU Unit): 4 units, only one can execute division.
+// * FXU (Floating-point Unit): 2 units.
+// * LSU (Load/Store Unit): 2 units.
+// * VXU (Vector Unit): 1 unit.
+// * Latency:
+// * Integer instructions: 1 cycle.
+// * Multiplication instructions: 4 cycles.
+// * Multiplication/Division instructions: 7-13 cycles.
+// * Floating-point instructions: 4-6 cycles.
+// * Vector instructions: 2-6 cycles.
+// * Load/Store:
+// * IXU: 4 cycles.
+// * FXU: 6 cycles.
+// * VXU: 6 cycles.
+// * Integer/floating-point/vector div/rem/sqrt/... are non-pipelined.
+//===----------------------------------------------------------------------===//
+
+def GenericOOOModel : SchedMachineModel {
+ int IssueWidth = 6;
+ int MicroOpBufferSize = 192;
+ int LoadLatency = 4;
+ int MispredictPenalty = 8;
+ let CompleteModel = 0;
+}
+
+let SchedModel = GenericOOOModel in {
+//===----------------------------------------------------------------------===//
+// Resource groups
+//===----------------------------------------------------------------------===//
+def GenericOOODIV : ProcResource<1>;
+def GenericOOOIXU : ProcResource<3>;
+def GenericOOOALU : ProcResGroup<[GenericOOODIV, GenericOOOIXU]>;
+def GenericOOOLSU : ProcResource<2>;
+def GenericOOOFPU : ProcResource<2>;
+// TODO: Add vector scheduling.
+// def GenericOOOVXU : ProcResource<1>;
+
+//===----------------------------------------------------------------------===//
+// Branches
+//===----------------------------------------------------------------------===//
+def : WriteRes<WriteJmp, [GenericOOOALU]>;
+def : WriteRes<WriteJalr, [GenericOOOALU]>;
+def : WriteRes<WriteJal, [GenericOOOALU]>;
+
+//===----------------------------------------------------------------------===//
+// Integer arithmetic and logic
+//===----------------------------------------------------------------------===//
+def : WriteRes<WriteIALU, [GenericOOOALU]>;
+def : WriteRes<WriteIALU32, [GenericOOOALU]>;
+def : WriteRes<WriteShiftImm, [GenericOOOALU]>;
+def : WriteRes<WriteShiftImm32, [GenericOOOALU]>;
+def : WriteRes<WriteShiftReg, [GenericOOOALU]>;
+def : WriteRes<WriteShiftReg32, [GenericOOOALU]>;
+
+//===----------------------------------------------------------------------===//
+// Integer multiplication
+//===----------------------------------------------------------------------===//
+let Latency = 4 in {
+ def : WriteRes<WriteIMul, [GenericOOOALU]>;
+ def : WriteRes<WriteIMul32, [GenericOOOALU]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Integer division
+//===----------------------------------------------------------------------===//
+def : WriteRes<WriteIDiv32, [GenericOOODIV]> {
+ let Latency = 13;
+ let ReleaseAtCycles = [13];
+}
+def : WriteRes<WriteIDiv, [GenericOOODIV]> {
+ let Latency = 21;
+ let ReleaseAtCycles = [21];
+}
+def : WriteRes<WriteIRem32, [GenericOOODIV]> {
+ let Latency = 13;
+ let ReleaseAtCycles = [13];
+}
+def : WriteRes<WriteIRem, [GenericOOODIV]> {
+ let Latency = 21;
+ let ReleaseAtCycles = [21];
+}
+
+//===----------------------------------------------------------------------===//
+// Integer memory
+//===----------------------------------------------------------------------===//
+// Load
+let Latency = 4 in {
+ def : WriteRes<WriteLDB, [GenericOOOLSU]>;
+ def : WriteRes<WriteLDH, [GenericOOOLSU]>;
+ def : WriteRes<WriteLDW, [GenericOOOLSU]>;
+ def : WriteRes<WriteLDD, [GenericOOOLSU]>;
+}
+
+// Store
+def : WriteRes<WriteSTB, [GenericOOOLSU]>;
+def : WriteRes<WriteSTH, [GenericOOOLSU]>;
+def : WriteRes<WriteSTW, [GenericOOOLSU]>;
+def : WriteRes<WriteSTD, [GenericOOOLSU]>;
+
+//===----------------------------------------------------------------------===//
+// Atomic
+//===----------------------------------------------------------------------===//
+let Latency = 4 in {
+ def : WriteRes<WriteAtomicLDW, [GenericOOOLSU]>;
+ def : WriteRes<WriteAtomicLDD, [GenericOOOLSU]>;
+}
+
+let Latency = 5 in {
+ def : WriteRes<WriteAtomicW, [GenericOOOLSU]>;
+ def : WriteRes<WriteAtomicD, [GenericOOOLSU]>;
+}
+
+def : WriteRes<WriteAtomicSTW, [GenericOOOLSU]>;
+def : WriteRes<WriteAtomicSTD, [GenericOOOLSU]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point
+//===----------------------------------------------------------------------===//
+// Floating-point load
+let Latency = 6 in {
+ def : WriteRes<WriteFLD32, [GenericOOOLSU]>;
+ def : WriteRes<WriteFLD64, [GenericOOOLSU]>;
+}
+
+// Floating-point store
+def : WriteRes<WriteFST32, [GenericOOOLSU]>;
+def : WriteRes<WriteFST64, [GenericOOOLSU]>;
+
+// Arithmetic and logic
+let Latency = 4 in {
+ def : WriteRes<WriteFAdd32, [GenericOOOFPU]>;
+ def : WriteRes<WriteFAdd64, [GenericOOOFPU]>;
+}
+
+let Latency = 5 in {
+ def : WriteRes<WriteFMul32, [GenericOOOFPU]>;
+ def : WriteRes<WriteFMul64, [GenericOOOFPU]>;
+}
+
+let Latency = 6 in {
+ def : WriteRes<WriteFMA32, [GenericOOOFPU]>;
+ def : WriteRes<WriteFMA64, [GenericOOOFPU]>;
+}
+
+def : WriteRes<WriteFSGNJ32, [GenericOOOFPU]>;
+def : WriteRes<WriteFSGNJ64, [GenericOOOFPU]>;
+def : WriteRes<WriteFMinMax32, [GenericOOOFPU]>;
+def : WriteRes<WriteFMinMax64, [GenericOOOFPU]>;
+
+// Compare
+let Latency = 2 in {
+ def : WriteRes<WriteFCmp32, [GenericOOOFPU]>;
+ def : WriteRes<WriteFCmp64, [GenericOOOFPU]>;
+}
+
+// Division
+let Latency = 13, ReleaseAtCycles = [13] in {
----------------
preames wrote:
Doesn't ReleaseAtCycle imply a non-pipelined implementation? (i.e. one which can only issue N cycles instead of being able to issue every cycle with a N-cycle delay on result availability) That seems odd for an assumed moderately high end OOO.
https://github.com/llvm/llvm-project/pull/120712
More information about the llvm-commits
mailing list