[clang] [llvm] [RISCV] Add processor definition and scheduling model for XiangShan-KunMingHu (PR #90392)
Yingwei Zheng via cfe-commits
cfe-commits at lists.llvm.org
Sun Apr 28 04:09:32 PDT 2024
=?utf-8?b?6YOd5bq36L6+?= <hebo at bosc.ac.cn>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/90392 at github.com>
================
@@ -0,0 +1,1489 @@
+//==- RISCVSchedXiangShanKunMingHu.td - XiangShanKunMingHu Scheduling Defs -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// The XiangShan is a high-performance open-source RISC-V processor project
+// initiated by the Institute of Computing Technology(ICT), Chinese Academy of Sciences(CAS).
+// The KunMingHu architecture is its third-generation derivative,
+// developed by the Institute of Computing Technology, Chinese Academy of Sciences
+// and the Beijing Institute of Open Source Chip (BOSC),
+// with a focus on achieving higher performance.
+// Source: https://github.com/OpenXiangShan/XiangShan
+// Documentation: https://github.com/OpenXiangShan/XiangShan-doc
+
+//===----------------------------------------------------------------------===//
+// KunMingHu core supports "RV64IMAFDCV_zba_zbb_zbc_zbs_zbkb_zbkc_zbkx_zknd_zkne_zknh
+// _zksed_zksh_svinval_zicbom_zicboz_zicsr_zifencei"
+// then floating-point SEW can only be 64 and 32, not 16 and 8.
+class NoZvfhSchedSEWSet_rm8and16<string mx, bit isF = 0, bit isWidening = 0> {
+ defvar t = SchedSEWSet<mx, isF, isWidening>.val;
+ defvar remove8and16 = !if(isF, !listremove(t, [8, 16]), t);
+ list<int> val = remove8and16;
+}
+
+class NoZvfhSmallestSEW<string mx, bit isF = 0, bit isWidening = 0> {
+ int r = !head(NoZvfhSchedSEWSet_rm8and16<mx, isF, isWidening>.val);
+}
+
+multiclass NoZvfh_LMULSEWReadAdvanceImpl<string name, int val, list<SchedWrite> writes = [],
+ list<string> MxList, bit isF = 0,
+ bit isWidening = 0> {
+ if !exists<SchedRead>(name # "_WorstCase") then
+ def : ReadAdvance<!cast<SchedRead>(name # "_WorstCase"), val, writes>;
+ foreach mx = MxList in {
+ foreach sew = NoZvfhSchedSEWSet_rm8and16<mx, isF, isWidening>.val in
+ if !exists<SchedRead>(name # "_" # mx # "_E" # sew) then
+ def : ReadAdvance<!cast<SchedRead>(name # "_" # mx # "_E" # sew), val, writes>;
+ }
+}
+
+multiclass LMULSEWReadAdvanceFnoZvfh<string name, int val, list<SchedWrite> writes = []>
+ : NoZvfh_LMULSEWReadAdvanceImpl<name, val, writes, SchedMxListF, isF=1,
+ isWidening=0>;
+
+multiclass LMULSEWReadAdvanceFWnoZvfh<string name, int val, list<SchedWrite> writes = []>
+ : NoZvfh_LMULSEWReadAdvanceImpl<name, val, writes, SchedMxListFW, isF = 1,
+ isWidening=1>;
+
+//===----------------------------------------------------------------------===//
+// If Zvfhmin and Zvfh are not supported, floating-point SEW can only be 32 or 64.
+class NoZvfhSchedSEWSet_rm32and64<string mx, bit isF = 0, bit isWidening = 0> {
+ defvar t = SchedSEWSet<mx, isF, isWidening>.val;
+ defvar remove32and64 = !if(isF, !listremove(t, [32, 64]), t);
+ list<int> val = remove32and64;
+}
+
+// Write-Impl
+multiclass NoZvfhLMULSEWWriteResImpl<string name, list<ProcResourceKind> resources,
+ list<string> MxList, bit isF = 0,
+ bit isWidening = 0> {
+ foreach mx = MxList in {
+ foreach sew = NoZvfhSchedSEWSet_rm32and64<mx, isF, isWidening>.val in
+ if !exists<SchedWrite>(name # "_" # mx # "_E" # sew) then
+ def : WriteRes<!cast<SchedWrite>(name # "_" # mx # "_E" # sew), resources>;
+ }
+}
+// Read-Impl
+multiclass NoZvfhLMULSEWReadAdvanceImpl<string name, int val, list<SchedWrite> writes = [],
+ list<string> MxList, bit isF = 0,
+ bit isWidening = 0> {
+ foreach mx = MxList in {
+ foreach sew = NoZvfhSchedSEWSet_rm32and64<mx, isF, isWidening>.val in
+ if !exists<SchedRead>(name # "_" # mx # "_E" # sew) then
+ def : ReadAdvance<!cast<SchedRead>(name # "_" # mx # "_E" # sew), val, writes>;
+ }
+}
+
+// Write
+multiclass NoZvfhLMULSEWWriteResF<string name, list<ProcResourceKind> resources>
+ : NoZvfhLMULSEWWriteResImpl<name, resources, SchedMxListF, isF=1>;
+
+multiclass NoZvfhLMULSEWWriteResFW<string name, list<ProcResourceKind> resources>
+ : NoZvfhLMULSEWWriteResImpl<name, resources, SchedMxListFW, isF=1, isWidening=1>;
+
+multiclass NoZvfhLMULSEWWriteResFWRed<string name, list<ProcResourceKind> resources>
+ : NoZvfhLMULSEWWriteResImpl<name, resources, SchedMxListFWRed, isF=1, isWidening=1>;
+
+// Read
+multiclass NoZvfhLMULSEWReadAdvanceF<string name, int val, list<SchedWrite> writes = []>
+ : NoZvfhLMULSEWReadAdvanceImpl<name, val, writes, SchedMxListF, isF=1>;
+multiclass
+ NoZvfhLMULSEWReadAdvanceFW<string name, int val, list<SchedWrite> writes = []>
+ : NoZvfhLMULSEWReadAdvanceImpl<name, val, writes, SchedMxListFW, isF=1,
+ isWidening = 1>;
+
+multiclass UnsupportedSchedZvfh {
+let Unsupported = true in {
+// Write
+// 13. Vector Floating-Point Instructions
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFALUV", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFALUF", []>;
+defm "" : NoZvfhLMULSEWWriteResFW<"WriteVFWALUV", []>;
+defm "" : NoZvfhLMULSEWWriteResFW<"WriteVFWALUF", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFMulV", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFMulF", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFDivV", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFDivF", []>;
+defm "" : NoZvfhLMULSEWWriteResFW<"WriteVFWMulV", []>;
+defm "" : NoZvfhLMULSEWWriteResFW<"WriteVFWMulF", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFMulAddV", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFMulAddF", []>;
+defm "" : NoZvfhLMULSEWWriteResFW<"WriteVFWMulAddV", []>;
+defm "" : NoZvfhLMULSEWWriteResFW<"WriteVFWMulAddF", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFSqrtV", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFRecpV", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFMinMaxV", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFMinMaxF", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFSgnjV", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFSgnjF", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFCvtIToFV", []>;
+defm "" : NoZvfhLMULSEWWriteResFW<"WriteVFWCvtFToFV", []>;
+defm "" : NoZvfhLMULSEWWriteResFW<"WriteVFNCvtIToFV", []>;
+defm "" : NoZvfhLMULSEWWriteResFW<"WriteVFNCvtFToFV", []>;
+
+// 14. Vector Reduction Operations
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFRedV_From", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFRedOV_From", []>;
+defm "" : NoZvfhLMULSEWWriteResF<"WriteVFRedMinMaxV_From", []>;
+defm "" : NoZvfhLMULSEWWriteResFWRed<"WriteVFWRedV_From", []>;
+defm "" : NoZvfhLMULSEWWriteResFWRed<"WriteVFWRedOV_From", []>;
+
+// Read
+// 13. Vector Floating-Point Instructions
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFALUV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFALUF", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFMulV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFMulF", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFDivV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFDivF", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
+defm "" : NoZvfhLMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
+
+} // Unsupported
+} // UnsupportedSchedZvfh
+
+//===----------------------------------------------------------------------===//
+
+class XSGetCyclesVIALU<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 2,
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 16,
+ !eq(mx, "MF2") : 2,
+ !eq(mx, "MF4") : 2,
+ !eq(mx, "MF8") : 2
+ );
+}
+
+class XSGetCyclesVIMAC<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 3,
+ !eq(mx, "M2") : 6,
+ !eq(mx, "M4") : 12,
+ !eq(mx, "M8") : 24,
+ !eq(mx, "MF2") : 3,
+ !eq(mx, "MF4") : 3,
+ !eq(mx, "MF8") : 3
+ );
+}
+
+class XSGetCyclesVIDIV<string mx, int sew> {
+ int uop = !cond(
+ !eq(mx, "M1") : 1,
+ !eq(mx, "M2") : 2,
+ !eq(mx, "M4") : 4,
+ !eq(mx, "M8") : 8,
+ !eq(mx, "MF2") : 1,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+ int cycles = !cond(
+ !eq(sew, 64) : 19, // I64: 4-19
+ !eq(sew, 32) : 11, // I32: 4-11
+ !eq(sew, 16) : 7, // I16: 4-7
+ !eq(sew, 8) : 6 // I8: 6
+ );
+ int c = !mul(uop, cycles);
+}
+
+class XSGetCyclesVIPU<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 2,
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 16,
+ !eq(mx, "MF2") : 2,
+ !eq(mx, "MF4") : 2,
+ !eq(mx, "MF8") : 2
+ );
+}
+
+class XSGetCyclesVPPU<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 2,
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 16,
+ !eq(mx, "MF2") : 2,
+ !eq(mx, "MF4") : 2,
+ !eq(mx, "MF8") : 2
+ );
+}
+
+class XSGetCyclesVFALU<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 2,
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 16,
+ !eq(mx, "MF2") : 2,
+ !eq(mx, "MF4") : 2,
+ !eq(mx, "MF8") : 2
+ );
+}
+
+class XSGetCyclesVFMA<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 4,
+ !eq(mx, "M2") : 8,
+ !eq(mx, "M4") : 16,
+ !eq(mx, "M8") : 32,
+ !eq(mx, "MF2") : 4,
+ !eq(mx, "MF4") : 4,
+ !eq(mx, "MF8") : 4
+ );
+}
+
+class XSGetCyclesVFDIV<string mx, int sew> {
+ assert !or(!eq(sew, 32), !eq(sew, 64)), "Floating-point SEW of KunMingHu can only be 32 or 64.";
+ int uop = !cond(
+ !eq(mx, "M1") : 1,
+ !eq(mx, "M2") : 2,
+ !eq(mx, "M4") : 4,
+ !eq(mx, "M8") : 8,
+ !eq(mx, "MF2") : 1,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+ int cycles = !cond(
+ !eq(sew, 64) : 15, // FP64: 15
+ !eq(sew, 32) : 10, // FP32: 10
+ );
+ int c = !mul(uop, cycles);
+}
+
+class XSGetCyclesVFCVT<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 3,
+ !eq(mx, "M2") : 6,
+ !eq(mx, "M4") : 12,
+ !eq(mx, "M8") : 24,
+ !eq(mx, "MF2") : 3,
+ !eq(mx, "MF4") : 3,
+ !eq(mx, "MF8") : 3
+ );
+}
+
+class XSGetCyclesVLDU<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 8,
+ !eq(mx, "M2") : 16,
+ !eq(mx, "M4") : 32,
+ !eq(mx, "M8") : 64,
+ !eq(mx, "MF2") : 8,
+ !eq(mx, "MF4") : 8,
+ !eq(mx, "MF8") : 8
+ );
+}
+
+class XSGetCyclesVSTU<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 7,
+ !eq(mx, "M2") : 14,
+ !eq(mx, "M4") : 28,
+ !eq(mx, "M8") : 56,
+ !eq(mx, "MF2") : 7,
+ !eq(mx, "MF4") : 7,
+ !eq(mx, "MF8") : 7
+ );
+}
+
+// If mx is the maximum LMUL in the MxList, then c is true, indicating the worst case.
+class XSIsWorstCaseMX<string mx, list<string> MxList> {
+ defvar LLMUL = LargestLMUL<MxList>.r;
+ bit c = !eq(mx, LLMUL);
+}
+
+// If mx is the maximum LMUL in the MxList, and sew is the minimum value
+// when LMUL=mx, then c is true, indicating the worst case.
+class XSIsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
+ bit isF = 0> {
+ defvar LLMUL = LargestLMUL<MxList>.r;
+ defvar SSEW = NoZvfhSmallestSEW<mx, isF>.r;
+ bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
+}
+
+class XSLDUtoAnyBypass<SchedRead read, int cycles = 2>
+ : ReadAdvance<read, cycles, [WriteLDB, WriteLDH,
+ WriteLDW, WriteLDD,
+ WriteAtomicW, WriteAtomicD,
+ WriteAtomicLDW, WriteAtomicLDD]>;
+
+//===----------------------------------------------------------------------===//
+
+def XiangShanKunMingHuModel : SchedMachineModel {
+ let IssueWidth = 6; // 6-way decode and dispatch
+ let MicroOpBufferSize = 256;
+ let LoopMicroOpBufferSize = 48; // Instruction queue size
+ let LoadLatency = 6;
+ let MispredictPenalty = 13; // Based on estimate of pipeline depth.
+ let PostRAScheduler = 1;
+ let CompleteModel = 0;
+ let UnsupportedFeatures = [HasStdExtZcmt, HasStdExtZkr];
+}
+
+let SchedModel = XiangShanKunMingHuModel in {
----------------
dtcxzyw wrote:
Can you tell me where is the documentation of Xiangshan-Kunminghu? The [documentation](https://xiangshan-doc.readthedocs.io/zh-cn/latest/backend/overview/) is out-of-sync with your schedule model.
https://github.com/llvm/llvm-project/pull/90392
More information about the cfe-commits
mailing list