[llvm] [RISCV] Add sched model for XiangShan-NanHu (PR #70232)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 25 10:29:29 PDT 2023


https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/70232

[XiangShan](https://github.com/OpenXiangShan/XiangShan) is an open-source high-performance RISC-V processor.

This PR adds the schedule model for XiangShan-NanHu, the 2nd Gen core of the XiangShan processor series.
Overview: https://xiangshan-doc.readthedocs.io/zh-cn/latest/integration/overview/

It is based on the patch [D122556](https://reviews.llvm.org/D122556) by @SForeKeeper. The original patch hasn't been updated for a long time and it is out of sync with the current RTL design.

Now ICT-CAS is about to complete the tape-out of NanHu core according to @poemonsense. So I posted this PR to add support for it.

[Move elimination](https://github.com/dtcxzyw/llvm-project/commit/59f6e22bf12f67d799a7777f641853fec76c0aa8#diff-32270ea35a510b9a116a50cf5b922c46c194da7b0e0afe76576b8b2bef06556d) and macro fusions will be supported in subsequent PRs.


>From 46644679be9cf1d2fdb75c70a27aefc1fd1488da Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 26 Oct 2023 00:23:32 +0800
Subject: [PATCH] [RISCV] Add sched model for XiangShan-NanHu

Co-authored-by: SForeKeeper <zkliu6 at gmail.com>
---
 clang/test/Driver/riscv-cpus.c                |  14 +
 clang/test/Misc/target-invalid-cpu-note.c     |   4 +-
 llvm/lib/Target/RISCV/RISCV.td                |   1 +
 llvm/lib/Target/RISCV/RISCVInstrInfoD.td      |   2 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoF.td      |   2 +-
 llvm/lib/Target/RISCV/RISCVProcessors.td      |  21 +
 llvm/lib/Target/RISCV/RISCVSchedRocket.td     |   2 +
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td    |   2 +
 .../Target/RISCV/RISCVSchedSyntacoreSCR1.td   |   2 +
 .../Target/RISCV/RISCVSchedXiangShanNanHu.td  | 307 ++++++++++
 llvm/lib/Target/RISCV/RISCVSchedule.td        |   2 +
 .../llvm-mca/RISCV/XiangShan/cascade-fma.s    |  53 ++
 .../llvm-mca/RISCV/XiangShan/gpr-bypass.s     | 527 ++++++++++++++++++
 .../llvm-mca/RISCV/XiangShan/load-to-alu.s    |  73 +++
 14 files changed, 1008 insertions(+), 4 deletions(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
 create mode 100644 llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s
 create mode 100644 llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s
 create mode 100644 llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s

diff --git a/clang/test/Driver/riscv-cpus.c b/clang/test/Driver/riscv-cpus.c
index 3eaceedce685fc6..70f0a63336bd478 100644
--- a/clang/test/Driver/riscv-cpus.c
+++ b/clang/test/Driver/riscv-cpus.c
@@ -20,6 +20,17 @@
 // MCPU-SYNTACORE-SCR1-MAX: "-target-feature" "+zicsr" "-target-feature" "+zifencei"
 // MCPU-SYNTACORE-SCR1-MAX: "-target-abi" "ilp32"
 
+// RUN: %clang --target=riscv64 -### -c %s 2>&1 -mcpu=xiangshan-nanhu | FileCheck -check-prefix=MCPU-XIANGSHAN-NANHU %s
+// MCPU-XIANGSHAN-NANHU: "-nostdsysteminc" "-target-cpu" "xiangshan-nanhu"
+// MCPU-XIANGSHAN-NANHU: "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d"
+// MCPU-XIANGSHAN-NANHU: "-target-feature" "+c"
+// MCPU-XIANGSHAN-NANHU: "-target-feature" "+zicbom" "-target-feature" "+zicboz" "-target-feature" "+zicsr" "-target-feature" "+zifencei"
+// MCPU-XIANGSHAN-NANHU: "-target-feature" "+zba" "-target-feature" "+zbb" "-target-feature" "+zbc"
+// MCPU-XIANGSHAN-NANHU: "-target-feature" "+zbkb" "-target-feature" "+zbkc" "-target-feature" "+zbkx" "-target-feature" "+zbs"
+// MCPU-XIANGSHAN-NANHU: "-target-feature" "+zkn" "-target-feature" "+zknd" "-target-feature" "+zkne" "-target-feature" "+zknh"
+// MCPU-XIANGSHAN-NANHU: "-target-feature" "+zks" "-target-feature" "+zksed" "-target-feature" "+zksh" "-target-feature" "+svinval"
+// MCPU-XIANGSHAN-NANHU: "-target-abi" "lp64d"
+
 // We cannot check much for -mcpu=native, but it should be replaced by a valid CPU string.
 // RUN: %clang --target=riscv64 -### -c %s -mcpu=native 2> %t.err || true
 // RUN: FileCheck --input-file=%t.err -check-prefix=MCPU-NATIVE %s
@@ -62,6 +73,9 @@
 // RUN: %clang --target=riscv64 -### -c %s 2>&1 -mtune=veyron-v1 | FileCheck -check-prefix=MTUNE-VEYRON-V1 %s
 // MTUNE-VEYRON-V1: "-tune-cpu" "veyron-v1"
 
+// RUN: %clang --target=riscv64 -### -c %s 2>&1 -mtune=xiangshan-nanhu | FileCheck -check-prefix=MTUNE-XIANGSHAN-NANHU %s
+// MTUNE-XIANGSHAN-NANHU: "-tune-cpu" "xiangshan-nanhu"
+
 // Check mtune alias CPU has resolved to the right CPU according XLEN.
 // RUN: %clang --target=riscv32 -### -c %s 2>&1 -mtune=generic | FileCheck -check-prefix=MTUNE-GENERIC-32 %s
 // MTUNE-GENERIC-32: "-tune-cpu" "generic"
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index b2a04ebdbce628f..8e91eb4c62dd259 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -85,7 +85,7 @@
 
 // RUN: not %clang_cc1 -triple riscv64 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV64
 // RISCV64: error: unknown target CPU 'not-a-cpu'
-// RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280, veyron-v1{{$}}
+// RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280, veyron-v1, xiangshan-nanhu{{$}}
 
 // RUN: not %clang_cc1 -triple riscv32 -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE-RISCV32
 // TUNE-RISCV32: error: unknown target CPU 'not-a-cpu'
@@ -93,4 +93,4 @@
 
 // RUN: not %clang_cc1 -triple riscv64 -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE-RISCV64
 // TUNE-RISCV64: error: unknown target CPU 'not-a-cpu'
-// TUNE-RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280, veyron-v1, generic, rocket, sifive-7-series{{$}}
+// TUNE-RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280, veyron-v1, xiangshan-nanhu, generic, rocket, sifive-7-series{{$}}
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index be93d5933d3329e..cb48ac4eeadd251 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -37,6 +37,7 @@ include "GISel/RISCVRegisterBanks.td"
 include "RISCVSchedRocket.td"
 include "RISCVSchedSiFive7.td"
 include "RISCVSchedSyntacoreSCR1.td"
+include "RISCVSchedXiangShanNanHu.td"
 
 //===----------------------------------------------------------------------===//
 // RISC-V processors supported.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 59312f02aeceb77..34becfafe77473d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -78,7 +78,7 @@ def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>;
 } // Predicates = [HasStdExtD]
 
 foreach Ext = DExts in {
-  let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in {
+  let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64Addend] in {
     defm FMADD_D  : FPFMA_rrr_frm_m<OPC_MADD,  0b01, "fmadd.d",  Ext>;
     defm FMSUB_D  : FPFMA_rrr_frm_m<OPC_MSUB,  0b01, "fmsub.d",  Ext>;
     defm FNMSUB_D : FPFMA_rrr_frm_m<OPC_NMSUB, 0b01, "fnmsub.d", Ext>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 8726245f1602ebf..3a5794bb2d19474 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -302,7 +302,7 @@ def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>;
 } // Predicates = [HasStdExtF]
 
 foreach Ext = FExts in {
-  let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in {
+  let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32Addend] in {
     defm FMADD_S  : FPFMA_rrr_frm_m<OPC_MADD,  0b00, "fmadd.s",  Ext>;
     defm FMSUB_S  : FPFMA_rrr_frm_m<OPC_MSUB,  0b00, "fmsub.s",  Ext>;
     defm FNMSUB_S : FPFMA_rrr_frm_m<OPC_NMSUB, 0b00, "fnmsub.s", Ext>;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index e4008d145ffa572..334e1f3f1d4521a 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -243,3 +243,24 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
                                              FeatureStdExtZicbop,
                                              FeatureStdExtZicboz,
                                              FeatureVendorXVentanaCondOps]>;
+
+def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
+                                          XiangShanNanHuModel,
+                                          [Feature64Bit,
+                                           FeatureStdExtZicsr,
+                                           FeatureStdExtZifencei,
+                                           FeatureStdExtM,
+                                           FeatureStdExtA,
+                                           FeatureStdExtF,
+                                           FeatureStdExtD,
+                                           FeatureStdExtC,
+                                           FeatureStdExtZba,
+                                           FeatureStdExtZbb,
+                                           FeatureStdExtZbc,
+                                           FeatureStdExtZbs,
+                                           FeatureStdExtZkn,
+                                           FeatureStdExtZksed,
+                                           FeatureStdExtZksh,
+                                           FeatureStdExtSvinval,
+                                           FeatureStdExtZicbom,
+                                           FeatureStdExtZicboz]>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 8fbc9afe267c562..bb9dfe5d0124098 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -206,7 +206,9 @@ def : ReadAdvance<ReadFAdd64, 0>;
 def : ReadAdvance<ReadFMul32, 0>;
 def : ReadAdvance<ReadFMul64, 0>;
 def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
 def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
 def : ReadAdvance<ReadFDiv32, 0>;
 def : ReadAdvance<ReadFDiv64, 0>;
 def : ReadAdvance<ReadFSqrt32, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 96ebe8e3e67686a..822dc43d21f8392 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -936,7 +936,9 @@ def : ReadAdvance<ReadFMA16, 0>;
 def : ReadAdvance<ReadFMul32, 0>;
 def : ReadAdvance<ReadFMul64, 0>;
 def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
 def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
 def : ReadAdvance<ReadFDiv16, 0>;
 def : ReadAdvance<ReadFDiv32, 0>;
 def : ReadAdvance<ReadFDiv64, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
index 960258c8bc7dfe8..06ad2075b073614 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
@@ -164,7 +164,9 @@ def : ReadAdvance<ReadFAdd64, 0>;
 def : ReadAdvance<ReadFMul32, 0>;
 def : ReadAdvance<ReadFMul64, 0>;
 def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
 def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
 def : ReadAdvance<ReadFDiv32, 0>;
 def : ReadAdvance<ReadFDiv64, 0>;
 def : ReadAdvance<ReadFSqrt32, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
new file mode 100644
index 000000000000000..da21a311cdf7e00
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -0,0 +1,307 @@
+//==- RISCVSchedXiangShanNanHu.td - XiangShan-NanHu Scheduling Definitions --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-------------------------------------------------------------------------------------===//
+
+//===-------------------------------------------------------------------------------------===//
+
+// XiangShan is a high-performance open-source RISC-V processor developed by
+// the Institute of Computing Technology (ICT) of the Chinese Academy of Sciences.
+// Source: https://github.com/OpenXiangShan/XiangShan
+// Documentation: https://github.com/OpenXiangShan/XiangShan-doc
+
+// XiangShan-NanHu is the second generation of XiangShan processor series.
+// Overview: https://xiangshan-doc.readthedocs.io/zh-cn/latest/integration/overview/
+
+def XiangShanNanHuModel : SchedMachineModel {
+  let MicroOpBufferSize = 256;
+  let LoopMicroOpBufferSize = 48;  // Instruction queue size
+  let IssueWidth = 6;  // 6-way decode and dispatch
+  let LoadLatency = 4;
+  let MispredictPenalty = 11; // Based on estimate of pipeline depth.
+  let CompleteModel = 0;
+  let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
+  let UnsupportedFeatures = [];
+}
+
+let SchedModel = XiangShanNanHuModel in {
+
+// The reservation stations are distributed and grouped as 32-entry or 16-entry smaller ones.
+let BufferSize = 16 in {
+  def XS2ALU : ProcResource<4>;
+  def XS2MDU : ProcResource<2>;
+  def XS2MISC : ProcResource<1>;
+
+  def XS2FMAC : ProcResource<4>;
+  def XS2FMISC : ProcResource<2>;
+
+  // Load/Store queues are ignored.
+  def XS2LD : ProcResource<2>;
+  def XS2ST : ProcResource<2>;
+}
+
+// Branching
+def : WriteRes<WriteJmp, [XS2MISC]>;
+def : WriteRes<WriteJal, [XS2MISC]>;
+def : WriteRes<WriteJalr, [XS2MISC]>;
+
+// Integer arithmetic and logic
+let Latency = 1 in {
+def : WriteRes<WriteIALU, [XS2ALU]>;
+def : WriteRes<WriteIALU32, [XS2ALU]>;
+def : WriteRes<WriteShiftImm, [XS2ALU]>;
+def : WriteRes<WriteShiftImm32, [XS2ALU]>;
+def : WriteRes<WriteShiftReg, [XS2ALU]>;
+def : WriteRes<WriteShiftReg32, [XS2ALU]>;
+}
+
+// Integer multiplication
+let Latency = 3 in {
+def : WriteRes<WriteIMul, [XS2MDU]>;
+def : WriteRes<WriteIMul32, [XS2MDU]>;
+}
+
+// Integer division
+// SRT16 algorithm
+let Latency = 20, ReleaseAtCycles = [20] in {
+def : WriteRes<WriteIDiv32, [XS2MDU]>;
+def : WriteRes<WriteIDiv, [XS2MDU]>;
+}
+
+// Zb*
+let Latency = 1 in {
+// Zba
+def : WriteRes<WriteSHXADD, [XS2ALU]>;
+def : WriteRes<WriteSHXADD32, [XS2ALU]>;
+
+// Zbb
+def : WriteRes<WriteRotateImm, [XS2ALU]>;
+def : WriteRes<WriteRotateImm32, [XS2ALU]>;
+def : WriteRes<WriteRotateReg, [XS2ALU]>;
+def : WriteRes<WriteRotateReg32, [XS2ALU]>;
+def : WriteRes<WriteORCB, [XS2ALU]>;
+def : WriteRes<WriteREV8, [XS2ALU]>;
+
+// Zbkb
+def : WriteRes<WriteBREV8, [XS2ALU]>;
+def : WriteRes<WritePACK, [XS2ALU]>;
+def : WriteRes<WritePACK32, [XS2ALU]>;
+def : WriteRes<WriteZIP, [XS2ALU]>;
+}
+
+let Latency = 3 in {
+// Zbb
+def : WriteRes<WriteCLZ, [XS2MDU]>;
+def : WriteRes<WriteCLZ32, [XS2MDU]>;
+def : WriteRes<WriteCTZ, [XS2MDU]>;
+def : WriteRes<WriteCTZ32, [XS2MDU]>;
+def : WriteRes<WriteCPOP, [XS2MDU]>;
+def : WriteRes<WriteCPOP32, [XS2MDU]>;
+
+// Zbs
+def : WriteRes<WriteSingleBit, [XS2MDU]>;
+def : WriteRes<WriteSingleBitImm, [XS2MDU]>;
+def : WriteRes<WriteBEXT, [XS2MDU]>;
+def : WriteRes<WriteBEXTI, [XS2MDU]>;
+
+// Zbkc
+def : WriteRes<WriteCLMUL, [XS2MDU]>;
+
+// Zbkx
+def : WriteRes<WriteXPERM, [XS2MDU]>;
+}
+
+// Memory
+def : WriteRes<WriteSTB, [XS2ST]>;
+def : WriteRes<WriteSTH, [XS2ST]>;
+def : WriteRes<WriteSTW, [XS2ST]>;
+def : WriteRes<WriteSTD, [XS2ST]>;
+def : WriteRes<WriteFST32, [XS2ST]>;
+def : WriteRes<WriteFST64, [XS2ST]>;
+def : WriteRes<WriteAtomicSTW, [XS2ST]>;
+def : WriteRes<WriteAtomicSTD, [XS2ST]>;
+
+let Latency = 5 in {
+def : WriteRes<WriteLDB, [XS2LD]>;
+def : WriteRes<WriteLDH, [XS2LD]>;
+def : WriteRes<WriteLDW, [XS2LD]>;
+def : WriteRes<WriteLDD, [XS2LD]>;
+
+def : WriteRes<WriteAtomicW, [XS2LD]>;
+def : WriteRes<WriteAtomicD, [XS2LD]>;
+def : WriteRes<WriteAtomicLDW, [XS2LD]>;
+def : WriteRes<WriteAtomicLDD, [XS2LD]>;
+
+def : WriteRes<WriteFLD32, [XS2LD]>;
+def : WriteRes<WriteFLD64, [XS2LD]>;
+}
+
+// XiangShan-NanHu uses FuDian FPU instead of Berkeley HardFloat.
+// Documentation: https://github.com/OpenXiangShan/fudian
+
+let Latency = 3 in {
+def : WriteRes<WriteFAdd32, [XS2FMAC]>;
+def : WriteRes<WriteFSGNJ32, [XS2FMAC]>;
+def : WriteRes<WriteFMinMax32, [XS2FMAC]>;
+def : WriteRes<WriteFAdd64, [XS2FMAC]>;
+def : WriteRes<WriteFSGNJ64, [XS2FMAC]>;
+def : WriteRes<WriteFMinMax64, [XS2FMAC]>;
+
+def : WriteRes<WriteFCvtI32ToF32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtI32ToF64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtI64ToF32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtI64ToF64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF32ToI32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF32ToI64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF64ToI32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF64ToI64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF32ToF64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF64ToF32, [XS2FMAC]>;
+
+def : WriteRes<WriteFClass32, [XS2FMAC]>;
+def : WriteRes<WriteFClass64, [XS2FMAC]>;
+def : WriteRes<WriteFCmp32, [XS2FMAC]>;
+def : WriteRes<WriteFCmp64, [XS2FMAC]>;
+def : WriteRes<WriteFMovF32ToI32, [XS2FMAC]>;
+def : WriteRes<WriteFMovI32ToF32, [XS2FMAC]>;
+def : WriteRes<WriteFMovF64ToI64, [XS2FMAC]>;
+def : WriteRes<WriteFMovI64ToF64, [XS2FMAC]>;
+}
+
+// FP multiplication
+let Latency = 3 in {
+def : WriteRes<WriteFMul32, [XS2FMAC]>;
+def : WriteRes<WriteFMul64, [XS2FMAC]>;
+}
+
+let Latency = 5 in {
+def : WriteRes<WriteFMA32, [XS2FMAC]>;
+def : WriteRes<WriteFMA64, [XS2FMAC]>;
+}
+
+// FP division
+def : WriteRes<WriteFDiv32, [XS2FMISC]> {
+    let Latency = 11;
+}
+def : WriteRes<WriteFDiv64, [XS2FMISC]> {
+    let Latency = 18;
+}
+
+def : WriteRes<WriteFSqrt32, [XS2FMISC]> {
+    let Latency = 17;
+}
+def : WriteRes<WriteFSqrt64, [XS2FMISC]> {
+    let Latency = 31;
+}
+
+// Others
+def : WriteRes<WriteCSR, [XS2MISC]>;
+def : WriteRes<WriteNop, []>;
+
+def : InstRW<[WriteIALU], (instrs COPY)>;
+
+// Bypass and advance
+
+class XS2LoadToALUBypass<SchedRead read>
+    : ReadAdvance<read, 1, [WriteLDB, WriteLDH, WriteLDW, WriteLDD, WriteAtomicW, WriteAtomicD, WriteAtomicLDW, WriteAtomicLDD]>;
+
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : XS2LoadToALUBypass<ReadIALU>;
+def : XS2LoadToALUBypass<ReadIALU32>;
+def : XS2LoadToALUBypass<ReadShiftImm>;
+def : XS2LoadToALUBypass<ReadShiftImm32>;
+def : XS2LoadToALUBypass<ReadShiftReg>;
+def : XS2LoadToALUBypass<ReadShiftReg32>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFStoreData, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
+def : ReadAdvance<ReadFAdd32, 0>;
+def : ReadAdvance<ReadFAdd64, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 2>; // Cascade FMA
+def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 2>; // Cascade FMA
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+
+// Zb*
+// Zba
+def : XS2LoadToALUBypass<ReadSHXADD>;
+def : XS2LoadToALUBypass<ReadSHXADD32>;
+// Zbb
+def : XS2LoadToALUBypass<ReadRotateImm>;
+def : XS2LoadToALUBypass<ReadRotateImm32>;
+def : XS2LoadToALUBypass<ReadRotateReg>;
+def : XS2LoadToALUBypass<ReadRotateReg32>;
+def : ReadAdvance<ReadCLZ, 0>;
+def : ReadAdvance<ReadCLZ32, 0>;
+def : ReadAdvance<ReadCTZ, 0>;
+def : ReadAdvance<ReadCTZ32, 0>;
+def : ReadAdvance<ReadCPOP, 0>;
+def : ReadAdvance<ReadCPOP32, 0>;
+def : XS2LoadToALUBypass<ReadORCB>;
+def : XS2LoadToALUBypass<ReadREV8>;
+// Zbkc
+def : ReadAdvance<ReadCLMUL, 0>;
+// Zbs
+def : ReadAdvance<ReadSingleBit, 0>;
+def : ReadAdvance<ReadSingleBitImm, 0>;
+// Zbkb
+def : XS2LoadToALUBypass<ReadBREV8>;
+def : XS2LoadToALUBypass<ReadPACK>;
+def : XS2LoadToALUBypass<ReadPACK32>;
+def : XS2LoadToALUBypass<ReadZIP>;
+// Zbkx
+def : ReadAdvance<ReadXPERM, 0>;
+
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedV;
+defm : UnsupportedSchedZfa;
+defm : UnsupportedSchedZfh;
+defm : UnsupportedSchedSFB;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index af318ea5bf6851a..e42d07a4a4cd3f0 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -151,7 +151,9 @@ def ReadFMul32      : SchedRead;    // 32-bit floating point multiply
 def ReadFMul64      : SchedRead;    // 64-bit floating point multiply
 def ReadFMA16       : SchedRead;    // 16-bit floating point fused multiply-add
 def ReadFMA32       : SchedRead;    // 32-bit floating point fused multiply-add
+def ReadFMA32Addend : SchedRead;    // 32-bit floating point fused multiply-add
 def ReadFMA64       : SchedRead;    // 64-bit floating point fused multiply-add
+def ReadFMA64Addend : SchedRead;    // 64-bit floating point fused multiply-add
 def ReadFDiv16      : SchedRead;    // 16-bit floating point divide
 def ReadFDiv32      : SchedRead;    // 32-bit floating point divide
 def ReadFDiv64      : SchedRead;    // 64-bit floating point divide
diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s
new file mode 100644
index 000000000000000..d44eb55ebf7598b
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s
@@ -0,0 +1,53 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu < %s | FileCheck %s
+
+# Test XiangShan FuDian's cascade FMA, CPI = 3
+fmadd.s fa0, fa1, fa2, fa0
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      100
+# CHECK-NEXT: Total Cycles:      305
+# CHECK-NEXT: Total uOps:        100
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    0.33
+# CHECK-NEXT: IPC:               0.33
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      5     0.25                        fmadd.s	fa0, fa1, fa2, fa0
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - XS2ALU
+# CHECK-NEXT: [0.1] - XS2ALU
+# CHECK-NEXT: [0.2] - XS2ALU
+# CHECK-NEXT: [0.3] - XS2ALU
+# CHECK-NEXT: [1.0] - XS2FMAC
+# CHECK-NEXT: [1.1] - XS2FMAC
+# CHECK-NEXT: [1.2] - XS2FMAC
+# CHECK-NEXT: [1.3] - XS2FMAC
+# CHECK-NEXT: [2.0] - XS2FMISC
+# CHECK-NEXT: [2.1] - XS2FMISC
+# CHECK-NEXT: [3.0] - XS2LD
+# CHECK-NEXT: [3.1] - XS2LD
+# CHECK-NEXT: [4.0] - XS2MDU
+# CHECK-NEXT: [4.1] - XS2MDU
+# CHECK-NEXT: [5]   - XS2MISC
+# CHECK-NEXT: [6.0] - XS2ST
+# CHECK-NEXT: [6.1] - XS2ST
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]
+# CHECK-NEXT:  -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]  Instructions:
+# CHECK-NEXT:  -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -     fmadd.s	fa0, fa1, fa2, fa0
diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s
new file mode 100644
index 000000000000000..677fece1535a042
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s
@@ -0,0 +1,527 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu -timeline \
+# RUN:   -timeline-max-cycles=1000 -iterations=1 < %s | FileCheck %s
+
+lui a0, 1
+auipc a1, 1
+add a0, a0, a1
+addi a0, a0, 1
+addw a0, a0, a0
+addiw a0, a0, 1
+sub a0, a0, a0
+subw a0, a0, a0
+and a0, a0, a0
+andi a0, a0, 1
+or a0, a0, a0
+ori a0, a0, 1
+xor a0, a0, a0
+xori a0, a0, 1
+sll a0, a0, a0
+slli a0, a0, 1
+sllw a0, a0, a0
+slliw a0, a0, 1
+srl a0, a0, a0
+srli a0, a0, 1
+srlw a0, a0, a0
+srliw a0, a0, 1
+sra a0, a0, a0
+srai a0, a0, 1
+sraw a0, a0, a0
+sraiw a0, a0, 1
+slt a0, a0, a0
+slti a0, a0, 1
+sltu a0, a0, a0
+sltiu a0, a0, 1
+mul a0, a0, a0
+add a0, a0, a0
+mulw a0, a0, a0
+add a0, a0, a0
+beq a0, a0, 1f
+1:
+add a0, a0, a0
+bne a0, a0, 1f
+1:
+add a0, a0, a0
+blt a0, a0, 1f
+1:
+add a0, a0, a0
+bltu a0, a0, 1f
+1:
+add a0, a0, a0
+bge a0, a0, 1f
+1:
+add a0, a0, a0
+bgeu a0, a0, 1f
+1:
+# zba
+add.uw a0, a0, a0
+slli.uw a0, a0, 1
+sh1add.uw a0, a0, a0
+sh2add.uw a0, a0, a0
+sh3add.uw a0, a0, a0
+sh1add a0, a0, a0
+sh2add a0, a0, a0
+sh3add a0, a0, a0
+# zbb
+andn a0, a0, a0
+orn a0, a0, a0
+xnor a0, a0, a0
+sext.b a0, a0
+sext.h a0, a0
+zext.h a0, a0
+min a0, a0, a0
+minu a0, a0, a0
+max a0, a0, a0
+maxu a0, a0, a0
+rol a0, a0, a0
+ror a0, a0, a0
+rori a0, a0, 1
+clz a0, a0
+clzw a0, a0
+ctz a0, a0
+ctzw a0, a0
+cpop a0, a0
+add a0, a0, a0
+cpopw a0, a0
+add a0, a0, a0
+rev8 a0, a0
+orc.b a0, a0
+lb a0, 0(a0)
+add a0, a0, a0
+lh a0, 0(a0)
+and a0, a0, a0
+lw a0, 0(a0)
+or a0, a0, a0
+ld a0, 0(a0)
+xor a0, a0, a0
+lbu a0, 0(a0)
+addi a0, a0, 1
+lhu a0, 0(a0)
+sub a0, a0, a0
+lwu a0, 0(a0)
+addw a0, a0, a0
+jr a0
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      91
+# CHECK-NEXT: Total Cycles:      124
+# CHECK-NEXT: Total uOps:        91
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    0.73
+# CHECK-NEXT: IPC:               0.73
+# CHECK-NEXT: Block RThroughput: 17.3
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.25                        lui	a0, 1
+# CHECK-NEXT:  1      1     0.25                        auipc	a1, 1
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a1
+# CHECK-NEXT:  1      1     0.25                        addi	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        addw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        addiw	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sub	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        subw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        and	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        andi	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        or	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        ori	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        xor	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        xori	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sll	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        slli	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sllw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        slliw	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        srl	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        srli	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        srlw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        srliw	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sra	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        srai	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sraw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sraiw	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        slt	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        slti	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sltu	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        seqz	a0, a0
+# CHECK-NEXT:  1      3     0.50                        mul	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      3     0.50                        mulw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        beq	a0, a0, .Ltmp0
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        bne	a0, a0, .Ltmp1
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        blt	a0, a0, .Ltmp2
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        bltu	a0, a0, .Ltmp3
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        bge	a0, a0, .Ltmp4
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        bgeu	a0, a0, .Ltmp5
+# CHECK-NEXT:  1      1     0.25                        add.uw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        slli.uw	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sh1add.uw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sh2add.uw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sh3add.uw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sh1add	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sh2add	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sh3add	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        andn	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        orn	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        xnor	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sext.b	a0, a0
+# CHECK-NEXT:  1      1     0.25                        sext.h	a0, a0
+# CHECK-NEXT:  1      1     0.25                        zext.h	a0, a0
+# CHECK-NEXT:  1      1     0.25                        min	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        minu	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        max	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        maxu	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        rol	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        ror	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        rori	a0, a0, 1
+# CHECK-NEXT:  1      3     0.50                        clz	a0, a0
+# CHECK-NEXT:  1      3     0.50                        clzw	a0, a0
+# CHECK-NEXT:  1      3     0.50                        ctz	a0, a0
+# CHECK-NEXT:  1      3     0.50                        ctzw	a0, a0
+# CHECK-NEXT:  1      3     0.50                        cpop	a0, a0
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      3     0.50                        cpopw	a0, a0
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        rev8	a0, a0
+# CHECK-NEXT:  1      1     0.25                        orc.b	a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   lb	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   lh	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        and	a0, a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   lw	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        or	a0, a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   ld	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        xor	a0, a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   lbu	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        addi	a0, a0, 1
+# CHECK-NEXT:  1      5     0.50    *                   lhu	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        sub	a0, a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   lwu	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        addw	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        jr	a0
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - XS2ALU
+# CHECK-NEXT: [0.1] - XS2ALU
+# CHECK-NEXT: [0.2] - XS2ALU
+# CHECK-NEXT: [0.3] - XS2ALU
+# CHECK-NEXT: [1.0] - XS2FMAC
+# CHECK-NEXT: [1.1] - XS2FMAC
+# CHECK-NEXT: [1.2] - XS2FMAC
+# CHECK-NEXT: [1.3] - XS2FMAC
+# CHECK-NEXT: [2.0] - XS2FMISC
+# CHECK-NEXT: [2.1] - XS2FMISC
+# CHECK-NEXT: [3.0] - XS2LD
+# CHECK-NEXT: [3.1] - XS2LD
+# CHECK-NEXT: [4.0] - XS2MDU
+# CHECK-NEXT: [4.1] - XS2MDU
+# CHECK-NEXT: [5]   - XS2MISC
+# CHECK-NEXT: [6.0] - XS2ST
+# CHECK-NEXT: [6.1] - XS2ST
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]
+# CHECK-NEXT: 17.00  17.00  17.00  18.00   -      -      -      -      -      -     3.00   4.00   4.00   4.00   7.00    -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]  Instructions:
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     lui	a0, 1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     auipc	a1, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a1
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addi	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     addw	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     addiw	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sub	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     subw	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     and	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     andi	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     or	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     ori	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     xor	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     xori	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sll	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     slli	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sllw	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     slliw	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     srl	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     srli	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     srlw	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     srliw	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sra	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     srai	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sraw	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     sraiw	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     slt	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     slti	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sltu	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     seqz	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -     mul	a0, a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     mulw	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     beq	a0, a0, .Ltmp0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     bne	a0, a0, .Ltmp1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     blt	a0, a0, .Ltmp2
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     bltu	a0, a0, .Ltmp3
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     bge	a0, a0, .Ltmp4
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     bgeu	a0, a0, .Ltmp5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     add.uw	a0, a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     slli.uw	a0, a0, 1
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sh1add.uw	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sh2add.uw	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     sh3add.uw	a0, a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sh1add	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sh2add	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sh3add	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     andn	a0, a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     orn	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     xnor	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sext.b	a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     sext.h	a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     zext.h	a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     min	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     minu	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     max	a0, a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     maxu	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     rol	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     ror	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     rori	a0, a0, 1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -     clz	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     clzw	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -     ctz	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     ctzw	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -     cpop	a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     cpopw	a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     rev8	a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     orc.b	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     lb	a0, 0(a0)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -     lh	a0, 0(a0)
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     and	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     lw	a0, 0(a0)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     or	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -     ld	a0, 0(a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     xor	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     lbu	a0, 0(a0)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addi	a0, a0, 1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -     lhu	a0, 0(a0)
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sub	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     lwu	a0, 0(a0)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     addw	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     jr	a0
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0123456789          0123456789          0123456789          0123456789          0123456789          0123456789
+# CHECK-NEXT: Index     0123456789          0123456789          0123456789          0123456789          0123456789          0123456789          0123
+
+# CHECK:      [0,0]     DeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   lui	a0, 1
+# CHECK-NEXT: [0,1]     DeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   auipc	a1, 1
+# CHECK-NEXT: [0,2]     D=eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a1
+# CHECK-NEXT: [0,3]     D==eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   addi	a0, a0, 1
+# CHECK-NEXT: [0,4]     D===eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   addw	a0, a0, a0
+# CHECK-NEXT: [0,5]     D====eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   addiw	a0, a0, 1
+# CHECK-NEXT: [0,6]     .D====eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sub	a0, a0, a0
+# CHECK-NEXT: [0,7]     .D=====eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   subw	a0, a0, a0
+# CHECK-NEXT: [0,8]     .D======eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   and	a0, a0, a0
+# CHECK-NEXT: [0,9]     .D=======eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   andi	a0, a0, 1
+# CHECK-NEXT: [0,10]    .D========eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   or	a0, a0, a0
+# CHECK-NEXT: [0,11]    .D=========eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   ori	a0, a0, 1
+# CHECK-NEXT: [0,12]    . D=========eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   xor	a0, a0, a0
+# CHECK-NEXT: [0,13]    . D==========eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   xori	a0, a0, 1
+# CHECK-NEXT: [0,14]    . D===========eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sll	a0, a0, a0
+# CHECK-NEXT: [0,15]    . D============eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   slli	a0, a0, 1
+# CHECK-NEXT: [0,16]    . D=============eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sllw	a0, a0, a0
+# CHECK-NEXT: [0,17]    . D==============eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   slliw	a0, a0, 1
+# CHECK-NEXT: [0,18]    .  D==============eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   srl	a0, a0, a0
+# CHECK-NEXT: [0,19]    .  D===============eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   srli	a0, a0, 1
+# CHECK-NEXT: [0,20]    .   D===============eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   srlw	a0, a0, a0
+# CHECK-NEXT: [0,21]    .    D===============eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   srliw	a0, a0, 1
+# CHECK-NEXT: [0,22]    .    .D===============eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sra	a0, a0, a0
+# CHECK-NEXT: [0,23]    .    . D===============eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   srai	a0, a0, 1
+# CHECK-NEXT: [0,24]    .    .  D===============eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sraw	a0, a0, a0
+# CHECK-NEXT: [0,25]    .    .   D===============eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sraiw	a0, a0, 1
+# CHECK-NEXT: [0,26]    .    .    D===============eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   slt	a0, a0, a0
+# CHECK-NEXT: [0,27]    .    .    .D===============eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   slti	a0, a0, 1
+# CHECK-NEXT: [0,28]    .    .    . D===============eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sltu	a0, a0, a0
+# CHECK-NEXT: [0,29]    .    .    .  D===============eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   seqz	a0, a0
+# CHECK-NEXT: [0,30]    .    .    .  D================eeeER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   mul	a0, a0, a0
+# CHECK-NEXT: [0,31]    .    .    .   D==================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,32]    .    .    .   D===================eeeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   mulw	a0, a0, a0
+# CHECK-NEXT: [0,33]    .    .    .    D=====================eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,34]    .    .    .    D======================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   beq	a0, a0, .Ltmp0
+# CHECK-NEXT: [0,35]    .    .    .    .D=====================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,36]    .    .    .    .D======================eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   bne	a0, a0, .Ltmp1
+# CHECK-NEXT: [0,37]    .    .    .    . D=====================eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,38]    .    .    .    . D======================eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   blt	a0, a0, .Ltmp2
+# CHECK-NEXT: [0,39]    .    .    .    .  D=====================eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,40]    .    .    .    .  D======================eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   bltu	a0, a0, .Ltmp3
+# CHECK-NEXT: [0,41]    .    .    .    .   D=====================eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,42]    .    .    .    .   D======================eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   bge	a0, a0, .Ltmp4
+# CHECK-NEXT: [0,43]    .    .    .    .    D=====================eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,44]    .    .    .    .    D======================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   bgeu	a0, a0, .Ltmp5
+# CHECK-NEXT: [0,45]    .    .    .    .    .D=====================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add.uw	a0, a0, a0
+# CHECK-NEXT: [0,46]    .    .    .    .    . D=====================eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   slli.uw	a0, a0, 1
+# CHECK-NEXT: [0,47]    .    .    .    .    .  D=====================eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh1add.uw	a0, a0, a0
+# CHECK-NEXT: [0,48]    .    .    .    .    .   D=====================eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh2add.uw	a0, a0, a0
+# CHECK-NEXT: [0,49]    .    .    .    .    .    D=====================eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh3add.uw	a0, a0, a0
+# CHECK-NEXT: [0,50]    .    .    .    .    .    .D=====================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh1add	a0, a0, a0
+# CHECK-NEXT: [0,51]    .    .    .    .    .    . D=====================eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh2add	a0, a0, a0
+# CHECK-NEXT: [0,52]    .    .    .    .    .    .  D=====================eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh3add	a0, a0, a0
+# CHECK-NEXT: [0,53]    .    .    .    .    .    .   D=====================eER .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   andn	a0, a0, a0
+# CHECK-NEXT: [0,54]    .    .    .    .    .    .    .  D==================eER.    .    .    .    .    .    .    .    .    .    .    .    .    .  .   orn	a0, a0, a0
+# CHECK-NEXT: [0,55]    .    .    .    .    .    .    .    . D===============eER    .    .    .    .    .    .    .    .    .    .    .    .    .  .   xnor	a0, a0, a0
+# CHECK-NEXT: [0,56]    .    .    .    .    .    .    .    .  D===============eER   .    .    .    .    .    .    .    .    .    .    .    .    .  .   sext.b	a0, a0
+# CHECK-NEXT: [0,57]    .    .    .    .    .    .    .    .   D===============eER  .    .    .    .    .    .    .    .    .    .    .    .    .  .   sext.h	a0, a0
+# CHECK-NEXT: [0,58]    .    .    .    .    .    .    .    .    D===============eER .    .    .    .    .    .    .    .    .    .    .    .    .  .   zext.h	a0, a0
+# CHECK-NEXT: [0,59]    .    .    .    .    .    .    .    .    .D===============eER.    .    .    .    .    .    .    .    .    .    .    .    .  .   min	a0, a0, a0
+# CHECK-NEXT: [0,60]    .    .    .    .    .    .    .    .    . D===============eER    .    .    .    .    .    .    .    .    .    .    .    .  .   minu	a0, a0, a0
+# CHECK-NEXT: [0,61]    .    .    .    .    .    .    .    .    .  D===============eER   .    .    .    .    .    .    .    .    .    .    .    .  .   max	a0, a0, a0
+# CHECK-NEXT: [0,62]    .    .    .    .    .    .    .    .    .   D===============eER  .    .    .    .    .    .    .    .    .    .    .    .  .   maxu	a0, a0, a0
+# CHECK-NEXT: [0,63]    .    .    .    .    .    .    .    .    .    D===============eER .    .    .    .    .    .    .    .    .    .    .    .  .   rol	a0, a0, a0
+# CHECK-NEXT: [0,64]    .    .    .    .    .    .    .    .    .    .D===============eER.    .    .    .    .    .    .    .    .    .    .    .  .   ror	a0, a0, a0
+# CHECK-NEXT: [0,65]    .    .    .    .    .    .    .    .    .    . D===============eER    .    .    .    .    .    .    .    .    .    .    .  .   rori	a0, a0, 1
+# CHECK-NEXT: [0,66]    .    .    .    .    .    .    .    .    .    . D================eeeER .    .    .    .    .    .    .    .    .    .    .  .   clz	a0, a0
+# CHECK-NEXT: [0,67]    .    .    .    .    .    .    .    .    .    . D===================eeeER   .    .    .    .    .    .    .    .    .    .  .   clzw	a0, a0
+# CHECK-NEXT: [0,68]    .    .    .    .    .    .    .    .    .    . D======================eeeER.    .    .    .    .    .    .    .    .    .  .   ctz	a0, a0
+# CHECK-NEXT: [0,69]    .    .    .    .    .    .    .    .    .    . D=========================eeeER  .    .    .    .    .    .    .    .    .  .   ctzw	a0, a0
+# CHECK-NEXT: [0,70]    .    .    .    .    .    .    .    .    .    . D============================eeeER    .    .    .    .    .    .    .    .  .   cpop	a0, a0
+# CHECK-NEXT: [0,71]    .    .    .    .    .    .    .    .    .    .  D==============================eER   .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,72]    .    .    .    .    .    .    .    .    .    .  D===============================eeeER.    .    .    .    .    .    .    .  .   cpopw	a0, a0
+# CHECK-NEXT: [0,73]    .    .    .    .    .    .    .    .    .    .   D=================================eER    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,74]    .    .    .    .    .    .    .    .    .    .    D=================================eER   .    .    .    .    .    .    .  .   rev8	a0, a0
+# CHECK-NEXT: [0,75]    .    .    .    .    .    .    .    .    .    .    .D=================================eER  .    .    .    .    .    .    .  .   orc.b	a0, a0
+# CHECK-NEXT: [0,76]    .    .    .    .    .    .    .    .    .    .    .D==================================eeeeeER  .    .    .    .    .    .  .   lb	a0, 0(a0)
+# CHECK-NEXT: [0,77]    .    .    .    .    .    .    .    .    .    .    . D=====================================eER  .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,78]    .    .    .    .    .    .    .    .    .    .    . D======================================eeeeeER  .    .    .    .    .  .   lh	a0, 0(a0)
+# CHECK-NEXT: [0,79]    .    .    .    .    .    .    .    .    .    .    .  D=========================================eER  .    .    .    .    .  .   and	a0, a0, a0
+# CHECK-NEXT: [0,80]    .    .    .    .    .    .    .    .    .    .    .  D==========================================eeeeeER  .    .    .    .  .   lw	a0, 0(a0)
+# CHECK-NEXT: [0,81]    .    .    .    .    .    .    .    .    .    .    .   D=============================================eER  .    .    .    .  .   or	a0, a0, a0
+# CHECK-NEXT: [0,82]    .    .    .    .    .    .    .    .    .    .    .   D==============================================eeeeeER  .    .    .  .   ld	a0, 0(a0)
+# CHECK-NEXT: [0,83]    .    .    .    .    .    .    .    .    .    .    .    D=================================================eER  .    .    .  .   xor	a0, a0, a0
+# CHECK-NEXT: [0,84]    .    .    .    .    .    .    .    .    .    .    .    D==================================================eeeeeER  .    .  .   lbu	a0, 0(a0)
+# CHECK-NEXT: [0,85]    .    .    .    .    .    .    .    .    .    .    .    .D=====================================================eER  .    .  .   addi	a0, a0, 1
+# CHECK-NEXT: [0,86]    .    .    .    .    .    .    .    .    .    .    .    .D======================================================eeeeeER  .  .   lhu	a0, 0(a0)
+# CHECK-NEXT: [0,87]    .    .    .    .    .    .    .    .    .    .    .    . D=========================================================eER  .  .   sub	a0, a0, a0
+# CHECK-NEXT: [0,88]    .    .    .    .    .    .    .    .    .    .    .    . D==========================================================eeeeeER.   lwu	a0, 0(a0)
+# CHECK-NEXT: [0,89]    .    .    .    .    .    .    .    .    .    .    .    .  D=============================================================eER.   addw	a0, a0, a0
+# CHECK-NEXT: [0,90]    .    .    .    .    .    .    .    .    .    .    .    .  D==============================================================eER   jr	a0
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       lui	a0, 1
+# CHECK-NEXT: 1.     1     1.0    1.0    0.0       auipc	a1, 1
+# CHECK-NEXT: 2.     1     2.0    0.0    0.0       add	a0, a0, a1
+# CHECK-NEXT: 3.     1     3.0    0.0    0.0       addi	a0, a0, 1
+# CHECK-NEXT: 4.     1     4.0    0.0    0.0       addw	a0, a0, a0
+# CHECK-NEXT: 5.     1     5.0    0.0    0.0       addiw	a0, a0, 1
+# CHECK-NEXT: 6.     1     5.0    0.0    0.0       sub	a0, a0, a0
+# CHECK-NEXT: 7.     1     6.0    0.0    0.0       subw	a0, a0, a0
+# CHECK-NEXT: 8.     1     7.0    0.0    0.0       and	a0, a0, a0
+# CHECK-NEXT: 9.     1     8.0    0.0    0.0       andi	a0, a0, 1
+# CHECK-NEXT: 10.    1     9.0    0.0    0.0       or	a0, a0, a0
+# CHECK-NEXT: 11.    1     10.0   0.0    0.0       ori	a0, a0, 1
+# CHECK-NEXT: 12.    1     10.0   0.0    0.0       xor	a0, a0, a0
+# CHECK-NEXT: 13.    1     11.0   0.0    0.0       xori	a0, a0, 1
+# CHECK-NEXT: 14.    1     12.0   0.0    0.0       sll	a0, a0, a0
+# CHECK-NEXT: 15.    1     13.0   0.0    0.0       slli	a0, a0, 1
+# CHECK-NEXT: 16.    1     14.0   0.0    0.0       sllw	a0, a0, a0
+# CHECK-NEXT: 17.    1     15.0   0.0    0.0       slliw	a0, a0, 1
+# CHECK-NEXT: 18.    1     15.0   0.0    0.0       srl	a0, a0, a0
+# CHECK-NEXT: 19.    1     16.0   0.0    0.0       srli	a0, a0, 1
+# CHECK-NEXT: 20.    1     16.0   0.0    0.0       srlw	a0, a0, a0
+# CHECK-NEXT: 21.    1     16.0   0.0    0.0       srliw	a0, a0, 1
+# CHECK-NEXT: 22.    1     16.0   0.0    0.0       sra	a0, a0, a0
+# CHECK-NEXT: 23.    1     16.0   0.0    0.0       srai	a0, a0, 1
+# CHECK-NEXT: 24.    1     16.0   0.0    0.0       sraw	a0, a0, a0
+# CHECK-NEXT: 25.    1     16.0   0.0    0.0       sraiw	a0, a0, 1
+# CHECK-NEXT: 26.    1     16.0   0.0    0.0       slt	a0, a0, a0
+# CHECK-NEXT: 27.    1     16.0   0.0    0.0       slti	a0, a0, 1
+# CHECK-NEXT: 28.    1     16.0   0.0    0.0       sltu	a0, a0, a0
+# CHECK-NEXT: 29.    1     16.0   0.0    0.0       seqz	a0, a0
+# CHECK-NEXT: 30.    1     17.0   0.0    0.0       mul	a0, a0, a0
+# CHECK-NEXT: 31.    1     19.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 32.    1     20.0   0.0    0.0       mulw	a0, a0, a0
+# CHECK-NEXT: 33.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 34.    1     23.0   0.0    0.0       beq	a0, a0, .Ltmp0
+# CHECK-NEXT: 35.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 36.    1     23.0   0.0    0.0       bne	a0, a0, .Ltmp1
+# CHECK-NEXT: 37.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 38.    1     23.0   0.0    0.0       blt	a0, a0, .Ltmp2
+# CHECK-NEXT: 39.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 40.    1     23.0   0.0    0.0       bltu	a0, a0, .Ltmp3
+# CHECK-NEXT: 41.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 42.    1     23.0   0.0    0.0       bge	a0, a0, .Ltmp4
+# CHECK-NEXT: 43.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 44.    1     23.0   0.0    0.0       bgeu	a0, a0, .Ltmp5
+# CHECK-NEXT: 45.    1     22.0   0.0    0.0       add.uw	a0, a0, a0
+# CHECK-NEXT: 46.    1     22.0   0.0    0.0       slli.uw	a0, a0, 1
+# CHECK-NEXT: 47.    1     22.0   0.0    0.0       sh1add.uw	a0, a0, a0
+# CHECK-NEXT: 48.    1     22.0   0.0    0.0       sh2add.uw	a0, a0, a0
+# CHECK-NEXT: 49.    1     22.0   0.0    0.0       sh3add.uw	a0, a0, a0
+# CHECK-NEXT: 50.    1     22.0   0.0    0.0       sh1add	a0, a0, a0
+# CHECK-NEXT: 51.    1     22.0   0.0    0.0       sh2add	a0, a0, a0
+# CHECK-NEXT: 52.    1     22.0   0.0    0.0       sh3add	a0, a0, a0
+# CHECK-NEXT: 53.    1     22.0   0.0    0.0       andn	a0, a0, a0
+# CHECK-NEXT: 54.    1     19.0   0.0    0.0       orn	a0, a0, a0
+# CHECK-NEXT: 55.    1     16.0   0.0    0.0       xnor	a0, a0, a0
+# CHECK-NEXT: 56.    1     16.0   0.0    0.0       sext.b	a0, a0
+# CHECK-NEXT: 57.    1     16.0   0.0    0.0       sext.h	a0, a0
+# CHECK-NEXT: 58.    1     16.0   0.0    0.0       zext.h	a0, a0
+# CHECK-NEXT: 59.    1     16.0   0.0    0.0       min	a0, a0, a0
+# CHECK-NEXT: 60.    1     16.0   0.0    0.0       minu	a0, a0, a0
+# CHECK-NEXT: 61.    1     16.0   0.0    0.0       max	a0, a0, a0
+# CHECK-NEXT: 62.    1     16.0   0.0    0.0       maxu	a0, a0, a0
+# CHECK-NEXT: 63.    1     16.0   0.0    0.0       rol	a0, a0, a0
+# CHECK-NEXT: 64.    1     16.0   0.0    0.0       ror	a0, a0, a0
+# CHECK-NEXT: 65.    1     16.0   0.0    0.0       rori	a0, a0, 1
+# CHECK-NEXT: 66.    1     17.0   0.0    0.0       clz	a0, a0
+# CHECK-NEXT: 67.    1     20.0   0.0    0.0       clzw	a0, a0
+# CHECK-NEXT: 68.    1     23.0   0.0    0.0       ctz	a0, a0
+# CHECK-NEXT: 69.    1     26.0   0.0    0.0       ctzw	a0, a0
+# CHECK-NEXT: 70.    1     29.0   0.0    0.0       cpop	a0, a0
+# CHECK-NEXT: 71.    1     31.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 72.    1     32.0   0.0    0.0       cpopw	a0, a0
+# CHECK-NEXT: 73.    1     34.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 74.    1     34.0   0.0    0.0       rev8	a0, a0
+# CHECK-NEXT: 75.    1     34.0   0.0    0.0       orc.b	a0, a0
+# CHECK-NEXT: 76.    1     35.0   0.0    0.0       lb	a0, 0(a0)
+# CHECK-NEXT: 77.    1     38.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 78.    1     39.0   0.0    0.0       lh	a0, 0(a0)
+# CHECK-NEXT: 79.    1     42.0   0.0    0.0       and	a0, a0, a0
+# CHECK-NEXT: 80.    1     43.0   0.0    0.0       lw	a0, 0(a0)
+# CHECK-NEXT: 81.    1     46.0   0.0    0.0       or	a0, a0, a0
+# CHECK-NEXT: 82.    1     47.0   0.0    0.0       ld	a0, 0(a0)
+# CHECK-NEXT: 83.    1     50.0   0.0    0.0       xor	a0, a0, a0
+# CHECK-NEXT: 84.    1     51.0   0.0    0.0       lbu	a0, 0(a0)
+# CHECK-NEXT: 85.    1     54.0   0.0    0.0       addi	a0, a0, 1
+# CHECK-NEXT: 86.    1     55.0   0.0    0.0       lhu	a0, 0(a0)
+# CHECK-NEXT: 87.    1     58.0   0.0    0.0       sub	a0, a0, a0
+# CHECK-NEXT: 88.    1     59.0   0.0    0.0       lwu	a0, 0(a0)
+# CHECK-NEXT: 89.    1     62.0   0.0    0.0       addw	a0, a0, a0
+# CHECK-NEXT: 90.    1     63.0   0.0    0.0       jr	a0
+# CHECK-NEXT:        1     22.7   0.0    0.0       <total>
diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s
new file mode 100644
index 000000000000000..e1925e7647e33c6
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu -timeline -iterations=1 < %s | FileCheck %s
+
+# Test XiangShan load to ALU (4 cycles)
+ld a1, 0(a0)
+addi a2, a1, 1
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      8
+# CHECK-NEXT: Total uOps:        2
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    0.25
+# CHECK-NEXT: IPC:               0.25
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      5     0.50    *                   ld	a1, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        addi	a2, a1, 1
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - XS2ALU
+# CHECK-NEXT: [0.1] - XS2ALU
+# CHECK-NEXT: [0.2] - XS2ALU
+# CHECK-NEXT: [0.3] - XS2ALU
+# CHECK-NEXT: [1.0] - XS2FMAC
+# CHECK-NEXT: [1.1] - XS2FMAC
+# CHECK-NEXT: [1.2] - XS2FMAC
+# CHECK-NEXT: [1.3] - XS2FMAC
+# CHECK-NEXT: [2.0] - XS2FMISC
+# CHECK-NEXT: [2.1] - XS2FMISC
+# CHECK-NEXT: [3.0] - XS2LD
+# CHECK-NEXT: [3.1] - XS2LD
+# CHECK-NEXT: [4.0] - XS2MDU
+# CHECK-NEXT: [4.1] - XS2MDU
+# CHECK-NEXT: [5]   - XS2MISC
+# CHECK-NEXT: [6.0] - XS2ST
+# CHECK-NEXT: [6.1] - XS2ST
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -     1.00    -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]  Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     ld	a1, 0(a0)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     addi	a2, a1, 1
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     01234567
+
+# CHECK:      [0,0]     DeeeeeER   ld	a1, 0(a0)
+# CHECK-NEXT: [0,1]     D====eER   addi	a2, a1, 1
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       ld	a1, 0(a0)
+# CHECK-NEXT: 1.     1     5.0    0.0    0.0       addi	a2, a1, 1
+# CHECK-NEXT:        1     3.0    0.5    0.0       <total>



More information about the llvm-commits mailing list