[llvm] [RISCV] Add SiFiveP600Model SchedModel that is used by sifive-p670 (PR #84962)

Tue Mar 12 13:27:11 PDT 2024

================
@@ -0,0 +1,1029 @@
+//==- RISCVSchedSiFiveP600.td - SiFiveP600 Scheduling Defs ---*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+
+/// c is true if mx has the worst case behavior compared to LMULs in MxList.
+/// On the SiFiveP600, the worst case LMUL is the Largest LMUL
+/// and the worst case sew is the smallest SEW for that LMUL.
+class SiFiveP600IsWorstCaseMX<string mx, list<string> MxList> {
+  string LLMUL = LargestLMUL<MxList>.r;
+  bit c = !eq(mx, LLMUL);
+}
+
+class SiFiveP600IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> {
+  string LLMUL = LargestLMUL<MxList>.r;
+  int SSEW = SmallestSEW<mx, isF>.r;
+  bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
+}
+
+// 1 Micro-Op per cycle.
+class SiFiveP600GetLMulCycles<string mx> {
+  int c = !cond(
+    !eq(mx, "M1") : 1,
+    !eq(mx, "M2") : 2,
+    !eq(mx, "M4") : 4,
+    !eq(mx, "M8") : 8,
+    !eq(mx, "MF2") : 1,
+    !eq(mx, "MF4") : 1,
+    !eq(mx, "MF8") : 1
+  );
+}
+
+// Latency for segmented loads and stores are calculated as vl * nf.
+class SiFiveP600GetCyclesSegmented<string mx, int sew, int nf> {
+  defvar VLEN = 128;
+  defvar VLUpperBound = !cond(
+    !eq(mx, "M1") : !div(VLEN, sew),
+    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
+    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
+    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
+    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
+    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
+    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
+  );
+  int c = !mul(VLUpperBound, nf);
+}
+
+// SiFiveP600 machine model for scheduling and other instruction cost heuristics.
+def SiFiveP600Model : SchedMachineModel {
+  let IssueWidth = 4;         // 4 micro-ops are dispatched per cycle.
+  let MicroOpBufferSize = 160; // Max micro-ops that can be buffered.
+  let LoadLatency = 4;        // Cycles for loads to access the cache.
+  let MispredictPenalty = 9;  // Extra cycles for a mispredicted branch.
+  let PostRAScheduler = true;
+  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
+                             HasStdExtZknd, HasStdExtZkne, HasStdExtZknh,
+                             HasStdExtZksed, HasStdExtZksh, HasStdExtZkr,
+                             HasVendorXSfvqmaccqoq];
+  let CompleteModel = false;
+}
+
+let SchedModel = SiFiveP600Model in {
+
+def SiFiveP600IEXQ0       : ProcResource<1>;
+def SiFiveP600IEXQ1       : ProcResource<1>;
+def SiFiveP600IEXQ2       : ProcResource<1>;
+def SiFiveP600IEXQ3       : ProcResource<1>;
+def SiFiveP600FEXQ0       : ProcResource<1>;
+def SiFiveP600FEXQ1       : ProcResource<1>;
+
+// Two Load/Store ports that can issue either two loads, two stores, or one load
+// and one store (P550 has one load and one separate store pipe).
+def SiFiveP600LDST       : ProcResource<2>;
+
+// 4-wide pipeline with 4 ALU pipes.
+def SiFiveP600IntArith    : ProcResGroup<[SiFiveP600IEXQ0, SiFiveP600IEXQ1, SiFiveP600IEXQ2, SiFiveP600IEXQ3]>;
+defvar SiFiveP600SYS      = SiFiveP600IEXQ0;
+defvar SiFiveP600CMOV     = SiFiveP600IEXQ0;
+defvar SiFiveP600MulI2F   = SiFiveP600IEXQ1;
+def SiFiveP600Branch      : ProcResGroup<[SiFiveP600IEXQ2, SiFiveP600IEXQ3]>;
+def SiFiveP600Div         : ProcResource<1>;
+
+def SiFiveP600FloatArith  : ProcResGroup<[SiFiveP600FEXQ0, SiFiveP600FEXQ1]>;
+defvar SiFiveP600F2I      = SiFiveP600FEXQ0;
+def SiFiveP600FloatDiv    : ProcResource<1>;
+
+// Vector pipeline
+// VEXQ0 handle Mask, Simple Slide instructions,
+// VEXQ1 handle Complex Slide, Permutation, Reductions, Divide instructions.
+// Other vector instructions can be done in VEXQ0 and VEXQ1.
+def SiFiveP600VEXQ0        : ProcResource<1>;
+def SiFiveP600VEXQ1        : ProcResource<1>;
+def SiFiveP600VectorArith  : ProcResGroup<[SiFiveP600VEXQ0, SiFiveP600VEXQ1]>;
+
+// In Baler has 2 pipeline for Load and Store.
+def SiFiveP600VLD          : ProcResource<1>;
+def SiFiveP600VST          : ProcResource<1>;
+def SiFiveP600VDiv         : ProcResource<1>;
+def SiFiveP600VFloatDiv    : ProcResource<1>;
+
+let Latency = 1 in {
+// Integer arithmetic and logic
+def : WriteRes<WriteIALU, [SiFiveP600IntArith]>;
+def : WriteRes<WriteIALU32, [SiFiveP600IntArith]>;
+def : WriteRes<WriteShiftImm, [SiFiveP600IntArith]>;
+def : WriteRes<WriteShiftImm32, [SiFiveP600IntArith]>;
+def : WriteRes<WriteShiftReg, [SiFiveP600IntArith]>;
+def : WriteRes<WriteShiftReg32, [SiFiveP600IntArith]>;
+// Branching
+def : WriteRes<WriteJmp, [SiFiveP600Branch]>;
+def : WriteRes<WriteJal, [SiFiveP600Branch]>;
+def : WriteRes<WriteJalr, [SiFiveP600Branch]>;
+}
+
+// CMOV
+def P600WriteCMOV : SchedWriteRes<[SiFiveP600Branch, SiFiveP600CMOV]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+def : InstRW<[P600WriteCMOV], (instrs PseudoCCMOVGPRNoX0)>;
+
+let Latency = 3 in {
+// Integer multiplication
+def : WriteRes<WriteIMul, [SiFiveP600MulI2F]>;
+def : WriteRes<WriteIMul32, [SiFiveP600MulI2F]>;
+// cpop[w] look exactly like multiply.
+def : WriteRes<WriteCPOP, [SiFiveP600MulI2F]>;
+def : WriteRes<WriteCPOP32, [SiFiveP600MulI2F]>;
+}
+
+// Integer division
+def : WriteRes<WriteIDiv, [SiFiveP600MulI2F, SiFiveP600Div]> {
+  let Latency = 35;
+  let ReleaseAtCycles = [1, 34];
+}
+def : WriteRes<WriteIDiv32,  [SiFiveP600MulI2F, SiFiveP600Div]> {
+  let Latency = 20;
+  let ReleaseAtCycles = [1, 19];
+}
+
+// Integer remainder
+def : WriteRes<WriteIRem, [SiFiveP600MulI2F, SiFiveP600Div]> {
+  let Latency = 35;
+  let ReleaseAtCycles = [1, 34];
+}
+def : WriteRes<WriteIRem32, [SiFiveP600MulI2F, SiFiveP600Div]> {
+  let Latency = 20;
+  let ReleaseAtCycles = [1, 19];
+}
+
+let Latency = 1 in {
----------------
preames wrote:

Same

https://github.com/llvm/llvm-project/pull/84962