[llvm] [RISCV] Add sched model for XiangShan-NanHu (PR #70232)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 10 23:15:36 PST 2024


https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/70232

>From ea3e3d5d3fc612e9650b4038cee4dffcb2182c5f Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 28 Oct 2023 20:46:37 +0800
Subject: [PATCH 1/6] [RISCV] Add sched model for XiangShan-NanHu

Co-authored-by: SForeKeeper <zkliu6 at gmail.com>
---
 llvm/lib/Target/RISCV/RISCV.td                |   1 +
 llvm/lib/Target/RISCV/RISCVProcessors.td      |   2 +-
 .../Target/RISCV/RISCVSchedXiangShanNanHu.td  | 307 ++++++++++
 .../llvm-mca/RISCV/XiangShan/cascade-fma.s    |  53 ++
 .../llvm-mca/RISCV/XiangShan/gpr-bypass.s     | 527 ++++++++++++++++++
 .../llvm-mca/RISCV/XiangShan/load-to-alu.s    |  73 +++
 6 files changed, 962 insertions(+), 1 deletion(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
 create mode 100644 llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s
 create mode 100644 llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s
 create mode 100644 llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s

diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 27d52c16a4f39d..575bd4c9d3561d 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -44,6 +44,7 @@ include "RISCVSchedRocket.td"
 include "RISCVSchedSiFive7.td"
 include "RISCVSchedSiFiveP400.td"
 include "RISCVSchedSyntacoreSCR1.td"
+include "RISCVSchedXiangShanNanHu.td"
 
 //===----------------------------------------------------------------------===//
 // RISC-V processors supported.
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 59bb811058d488..ec50668aaa1681 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -330,7 +330,7 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
                                               TuneLDADDFusion]>;
 
 def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
-                                          NoSchedModel,
+                                          XiangShanNanHuModel,
                                           [Feature64Bit,
                                            FeatureStdExtZicsr,
                                            FeatureStdExtZifencei,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
new file mode 100644
index 00000000000000..24b5922a23df14
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -0,0 +1,307 @@
+//==- RISCVSchedXiangShanNanHu.td - XiangShan-NanHu Scheduling Definitions --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-------------------------------------------------------------------------------------===//
+
+//===-------------------------------------------------------------------------------------===//
+
+// XiangShan is a high-performance open-source RISC-V processor developed by
+// the Institute of Computing Technology (ICT), Chinese Academy of Sciences.
+// Source: https://github.com/OpenXiangShan/XiangShan
+// Documentation: https://github.com/OpenXiangShan/XiangShan-doc
+
+// XiangShan-NanHu is the second generation of XiangShan processor series.
+// Overview: https://xiangshan-doc.readthedocs.io/zh-cn/latest/integration/overview/
+
+def XiangShanNanHuModel : SchedMachineModel {
+  let MicroOpBufferSize = 256;
+  let LoopMicroOpBufferSize = 48;  // Instruction queue size
+  let IssueWidth = 6;  // 6-way decode and dispatch
+  let LoadLatency = 4;
+  let MispredictPenalty = 11; // Based on estimate of pipeline depth.
+  let CompleteModel = 0;
+  let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
+  let UnsupportedFeatures = [];
+}
+
+let SchedModel = XiangShanNanHuModel in {
+
+// The reservation stations are distributed and grouped as 32-entry or 16-entry smaller ones.
+let BufferSize = 16 in {
+  def XS2ALU : ProcResource<4>;
+  def XS2MDU : ProcResource<2>;
+  def XS2MISC : ProcResource<1>;
+
+  def XS2FMAC : ProcResource<4>;
+  def XS2FMISC : ProcResource<2>;
+
+  // Load/Store queues are ignored.
+  def XS2LD : ProcResource<2>;
+  def XS2ST : ProcResource<2>;
+}
+
+// Branching
+def : WriteRes<WriteJmp, [XS2MISC]>;
+def : WriteRes<WriteJal, [XS2MISC]>;
+def : WriteRes<WriteJalr, [XS2MISC]>;
+
+// Integer arithmetic and logic
+let Latency = 1 in {
+def : WriteRes<WriteIALU, [XS2ALU]>;
+def : WriteRes<WriteIALU32, [XS2ALU]>;
+def : WriteRes<WriteShiftImm, [XS2ALU]>;
+def : WriteRes<WriteShiftImm32, [XS2ALU]>;
+def : WriteRes<WriteShiftReg, [XS2ALU]>;
+def : WriteRes<WriteShiftReg32, [XS2ALU]>;
+}
+
+// Integer multiplication
+let Latency = 3 in {
+def : WriteRes<WriteIMul, [XS2MDU]>;
+def : WriteRes<WriteIMul32, [XS2MDU]>;
+}
+
+// Integer division
+// SRT16 algorithm
+let Latency = 20, ReleaseAtCycles = [20] in {
+def : WriteRes<WriteIDiv32, [XS2MDU]>;
+def : WriteRes<WriteIDiv, [XS2MDU]>;
+}
+
+// Zb*
+let Latency = 1 in {
+// Zba
+def : WriteRes<WriteSHXADD, [XS2ALU]>;
+def : WriteRes<WriteSHXADD32, [XS2ALU]>;
+
+// Zbb
+def : WriteRes<WriteRotateImm, [XS2ALU]>;
+def : WriteRes<WriteRotateImm32, [XS2ALU]>;
+def : WriteRes<WriteRotateReg, [XS2ALU]>;
+def : WriteRes<WriteRotateReg32, [XS2ALU]>;
+def : WriteRes<WriteORCB, [XS2ALU]>;
+def : WriteRes<WriteREV8, [XS2ALU]>;
+
+// Zbkb
+def : WriteRes<WriteBREV8, [XS2ALU]>;
+def : WriteRes<WritePACK, [XS2ALU]>;
+def : WriteRes<WritePACK32, [XS2ALU]>;
+def : WriteRes<WriteZIP, [XS2ALU]>;
+}
+
+let Latency = 3 in {
+// Zbb
+def : WriteRes<WriteCLZ, [XS2MDU]>;
+def : WriteRes<WriteCLZ32, [XS2MDU]>;
+def : WriteRes<WriteCTZ, [XS2MDU]>;
+def : WriteRes<WriteCTZ32, [XS2MDU]>;
+def : WriteRes<WriteCPOP, [XS2MDU]>;
+def : WriteRes<WriteCPOP32, [XS2MDU]>;
+
+// Zbs
+def : WriteRes<WriteSingleBit, [XS2MDU]>;
+def : WriteRes<WriteSingleBitImm, [XS2MDU]>;
+def : WriteRes<WriteBEXT, [XS2MDU]>;
+def : WriteRes<WriteBEXTI, [XS2MDU]>;
+
+// Zbkc
+def : WriteRes<WriteCLMUL, [XS2MDU]>;
+
+// Zbkx
+def : WriteRes<WriteXPERM, [XS2MDU]>;
+}
+
+// Memory
+def : WriteRes<WriteSTB, [XS2ST]>;
+def : WriteRes<WriteSTH, [XS2ST]>;
+def : WriteRes<WriteSTW, [XS2ST]>;
+def : WriteRes<WriteSTD, [XS2ST]>;
+def : WriteRes<WriteFST32, [XS2ST]>;
+def : WriteRes<WriteFST64, [XS2ST]>;
+def : WriteRes<WriteAtomicSTW, [XS2ST]>;
+def : WriteRes<WriteAtomicSTD, [XS2ST]>;
+
+let Latency = 5 in {
+def : WriteRes<WriteLDB, [XS2LD]>;
+def : WriteRes<WriteLDH, [XS2LD]>;
+def : WriteRes<WriteLDW, [XS2LD]>;
+def : WriteRes<WriteLDD, [XS2LD]>;
+
+def : WriteRes<WriteAtomicW, [XS2LD]>;
+def : WriteRes<WriteAtomicD, [XS2LD]>;
+def : WriteRes<WriteAtomicLDW, [XS2LD]>;
+def : WriteRes<WriteAtomicLDD, [XS2LD]>;
+
+def : WriteRes<WriteFLD32, [XS2LD]>;
+def : WriteRes<WriteFLD64, [XS2LD]>;
+}
+
+// XiangShan-NanHu uses FuDian FPU instead of Berkeley HardFloat.
+// Documentation: https://github.com/OpenXiangShan/fudian
+
+let Latency = 3 in {
+def : WriteRes<WriteFAdd32, [XS2FMAC]>;
+def : WriteRes<WriteFSGNJ32, [XS2FMAC]>;
+def : WriteRes<WriteFMinMax32, [XS2FMAC]>;
+def : WriteRes<WriteFAdd64, [XS2FMAC]>;
+def : WriteRes<WriteFSGNJ64, [XS2FMAC]>;
+def : WriteRes<WriteFMinMax64, [XS2FMAC]>;
+
+def : WriteRes<WriteFCvtI32ToF32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtI32ToF64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtI64ToF32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtI64ToF64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF32ToI32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF32ToI64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF64ToI32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF64ToI64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF32ToF64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF64ToF32, [XS2FMAC]>;
+
+def : WriteRes<WriteFClass32, [XS2FMAC]>;
+def : WriteRes<WriteFClass64, [XS2FMAC]>;
+def : WriteRes<WriteFCmp32, [XS2FMAC]>;
+def : WriteRes<WriteFCmp64, [XS2FMAC]>;
+def : WriteRes<WriteFMovF32ToI32, [XS2FMAC]>;
+def : WriteRes<WriteFMovI32ToF32, [XS2FMAC]>;
+def : WriteRes<WriteFMovF64ToI64, [XS2FMAC]>;
+def : WriteRes<WriteFMovI64ToF64, [XS2FMAC]>;
+}
+
+// FP multiplication
+let Latency = 3 in {
+def : WriteRes<WriteFMul32, [XS2FMAC]>;
+def : WriteRes<WriteFMul64, [XS2FMAC]>;
+}
+
+let Latency = 5 in {
+def : WriteRes<WriteFMA32, [XS2FMAC]>;
+def : WriteRes<WriteFMA64, [XS2FMAC]>;
+}
+
+// FP division
+def : WriteRes<WriteFDiv32, [XS2FMISC]> {
+    let Latency = 11;
+}
+def : WriteRes<WriteFDiv64, [XS2FMISC]> {
+    let Latency = 18;
+}
+
+def : WriteRes<WriteFSqrt32, [XS2FMISC]> {
+    let Latency = 17;
+}
+def : WriteRes<WriteFSqrt64, [XS2FMISC]> {
+    let Latency = 31;
+}
+
+// Others
+def : WriteRes<WriteCSR, [XS2MISC]>;
+def : WriteRes<WriteNop, []>;
+
+def : InstRW<[WriteIALU], (instrs COPY)>;
+
+// Bypass and advance
+
+class XS2LoadToALUBypass<SchedRead read>
+    : ReadAdvance<read, 1, [WriteLDB, WriteLDH, WriteLDW, WriteLDD, WriteAtomicW, WriteAtomicD, WriteAtomicLDW, WriteAtomicLDD]>;
+
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : XS2LoadToALUBypass<ReadIALU>;
+def : XS2LoadToALUBypass<ReadIALU32>;
+def : XS2LoadToALUBypass<ReadShiftImm>;
+def : XS2LoadToALUBypass<ReadShiftImm32>;
+def : XS2LoadToALUBypass<ReadShiftReg>;
+def : XS2LoadToALUBypass<ReadShiftReg32>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFStoreData, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
+def : ReadAdvance<ReadFAdd32, 0>;
+def : ReadAdvance<ReadFAdd64, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 2>; // Cascade FMA
+def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 2>; // Cascade FMA
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+
+// Zb*
+// Zba
+def : XS2LoadToALUBypass<ReadSHXADD>;
+def : XS2LoadToALUBypass<ReadSHXADD32>;
+// Zbb
+def : XS2LoadToALUBypass<ReadRotateImm>;
+def : XS2LoadToALUBypass<ReadRotateImm32>;
+def : XS2LoadToALUBypass<ReadRotateReg>;
+def : XS2LoadToALUBypass<ReadRotateReg32>;
+def : ReadAdvance<ReadCLZ, 0>;
+def : ReadAdvance<ReadCLZ32, 0>;
+def : ReadAdvance<ReadCTZ, 0>;
+def : ReadAdvance<ReadCTZ32, 0>;
+def : ReadAdvance<ReadCPOP, 0>;
+def : ReadAdvance<ReadCPOP32, 0>;
+def : XS2LoadToALUBypass<ReadORCB>;
+def : XS2LoadToALUBypass<ReadREV8>;
+// Zbkc
+def : ReadAdvance<ReadCLMUL, 0>;
+// Zbs
+def : ReadAdvance<ReadSingleBit, 0>;
+def : ReadAdvance<ReadSingleBitImm, 0>;
+// Zbkb
+def : XS2LoadToALUBypass<ReadBREV8>;
+def : XS2LoadToALUBypass<ReadPACK>;
+def : XS2LoadToALUBypass<ReadPACK32>;
+def : XS2LoadToALUBypass<ReadZIP>;
+// Zbkx
+def : ReadAdvance<ReadXPERM, 0>;
+
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedV;
+defm : UnsupportedSchedZfa;
+defm : UnsupportedSchedZfh;
+defm : UnsupportedSchedSFB;
+}
diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s
new file mode 100644
index 00000000000000..d44eb55ebf7598
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s
@@ -0,0 +1,53 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu < %s | FileCheck %s
+
+# Test XiangShan FuDian's cascade FMA, CPI = 3
+fmadd.s fa0, fa1, fa2, fa0
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      100
+# CHECK-NEXT: Total Cycles:      305
+# CHECK-NEXT: Total uOps:        100
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    0.33
+# CHECK-NEXT: IPC:               0.33
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      5     0.25                        fmadd.s	fa0, fa1, fa2, fa0
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - XS2ALU
+# CHECK-NEXT: [0.1] - XS2ALU
+# CHECK-NEXT: [0.2] - XS2ALU
+# CHECK-NEXT: [0.3] - XS2ALU
+# CHECK-NEXT: [1.0] - XS2FMAC
+# CHECK-NEXT: [1.1] - XS2FMAC
+# CHECK-NEXT: [1.2] - XS2FMAC
+# CHECK-NEXT: [1.3] - XS2FMAC
+# CHECK-NEXT: [2.0] - XS2FMISC
+# CHECK-NEXT: [2.1] - XS2FMISC
+# CHECK-NEXT: [3.0] - XS2LD
+# CHECK-NEXT: [3.1] - XS2LD
+# CHECK-NEXT: [4.0] - XS2MDU
+# CHECK-NEXT: [4.1] - XS2MDU
+# CHECK-NEXT: [5]   - XS2MISC
+# CHECK-NEXT: [6.0] - XS2ST
+# CHECK-NEXT: [6.1] - XS2ST
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]
+# CHECK-NEXT:  -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]  Instructions:
+# CHECK-NEXT:  -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -     fmadd.s	fa0, fa1, fa2, fa0
diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s
new file mode 100644
index 00000000000000..677fece1535a04
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s
@@ -0,0 +1,527 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu -timeline \
+# RUN:   -timeline-max-cycles=1000 -iterations=1 < %s | FileCheck %s
+
+lui a0, 1
+auipc a1, 1
+add a0, a0, a1
+addi a0, a0, 1
+addw a0, a0, a0
+addiw a0, a0, 1
+sub a0, a0, a0
+subw a0, a0, a0
+and a0, a0, a0
+andi a0, a0, 1
+or a0, a0, a0
+ori a0, a0, 1
+xor a0, a0, a0
+xori a0, a0, 1
+sll a0, a0, a0
+slli a0, a0, 1
+sllw a0, a0, a0
+slliw a0, a0, 1
+srl a0, a0, a0
+srli a0, a0, 1
+srlw a0, a0, a0
+srliw a0, a0, 1
+sra a0, a0, a0
+srai a0, a0, 1
+sraw a0, a0, a0
+sraiw a0, a0, 1
+slt a0, a0, a0
+slti a0, a0, 1
+sltu a0, a0, a0
+sltiu a0, a0, 1
+mul a0, a0, a0
+add a0, a0, a0
+mulw a0, a0, a0
+add a0, a0, a0
+beq a0, a0, 1f
+1:
+add a0, a0, a0
+bne a0, a0, 1f
+1:
+add a0, a0, a0
+blt a0, a0, 1f
+1:
+add a0, a0, a0
+bltu a0, a0, 1f
+1:
+add a0, a0, a0
+bge a0, a0, 1f
+1:
+add a0, a0, a0
+bgeu a0, a0, 1f
+1:
+# zba
+add.uw a0, a0, a0
+slli.uw a0, a0, 1
+sh1add.uw a0, a0, a0
+sh2add.uw a0, a0, a0
+sh3add.uw a0, a0, a0
+sh1add a0, a0, a0
+sh2add a0, a0, a0
+sh3add a0, a0, a0
+# zbb
+andn a0, a0, a0
+orn a0, a0, a0
+xnor a0, a0, a0
+sext.b a0, a0
+sext.h a0, a0
+zext.h a0, a0
+min a0, a0, a0
+minu a0, a0, a0
+max a0, a0, a0
+maxu a0, a0, a0
+rol a0, a0, a0
+ror a0, a0, a0
+rori a0, a0, 1
+clz a0, a0
+clzw a0, a0
+ctz a0, a0
+ctzw a0, a0
+cpop a0, a0
+add a0, a0, a0
+cpopw a0, a0
+add a0, a0, a0
+rev8 a0, a0
+orc.b a0, a0
+lb a0, 0(a0)
+add a0, a0, a0
+lh a0, 0(a0)
+and a0, a0, a0
+lw a0, 0(a0)
+or a0, a0, a0
+ld a0, 0(a0)
+xor a0, a0, a0
+lbu a0, 0(a0)
+addi a0, a0, 1
+lhu a0, 0(a0)
+sub a0, a0, a0
+lwu a0, 0(a0)
+addw a0, a0, a0
+jr a0
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      91
+# CHECK-NEXT: Total Cycles:      124
+# CHECK-NEXT: Total uOps:        91
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    0.73
+# CHECK-NEXT: IPC:               0.73
+# CHECK-NEXT: Block RThroughput: 17.3
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.25                        lui	a0, 1
+# CHECK-NEXT:  1      1     0.25                        auipc	a1, 1
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a1
+# CHECK-NEXT:  1      1     0.25                        addi	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        addw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        addiw	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sub	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        subw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        and	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        andi	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        or	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        ori	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        xor	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        xori	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sll	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        slli	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sllw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        slliw	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        srl	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        srli	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        srlw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        srliw	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sra	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        srai	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sraw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sraiw	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        slt	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        slti	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sltu	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        seqz	a0, a0
+# CHECK-NEXT:  1      3     0.50                        mul	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      3     0.50                        mulw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        beq	a0, a0, .Ltmp0
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        bne	a0, a0, .Ltmp1
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        blt	a0, a0, .Ltmp2
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        bltu	a0, a0, .Ltmp3
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        bge	a0, a0, .Ltmp4
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        bgeu	a0, a0, .Ltmp5
+# CHECK-NEXT:  1      1     0.25                        add.uw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        slli.uw	a0, a0, 1
+# CHECK-NEXT:  1      1     0.25                        sh1add.uw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sh2add.uw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sh3add.uw	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sh1add	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sh2add	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sh3add	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        andn	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        orn	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        xnor	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        sext.b	a0, a0
+# CHECK-NEXT:  1      1     0.25                        sext.h	a0, a0
+# CHECK-NEXT:  1      1     0.25                        zext.h	a0, a0
+# CHECK-NEXT:  1      1     0.25                        min	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        minu	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        max	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        maxu	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        rol	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        ror	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        rori	a0, a0, 1
+# CHECK-NEXT:  1      3     0.50                        clz	a0, a0
+# CHECK-NEXT:  1      3     0.50                        clzw	a0, a0
+# CHECK-NEXT:  1      3     0.50                        ctz	a0, a0
+# CHECK-NEXT:  1      3     0.50                        ctzw	a0, a0
+# CHECK-NEXT:  1      3     0.50                        cpop	a0, a0
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      3     0.50                        cpopw	a0, a0
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      1     0.25                        rev8	a0, a0
+# CHECK-NEXT:  1      1     0.25                        orc.b	a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   lb	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        add	a0, a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   lh	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        and	a0, a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   lw	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        or	a0, a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   ld	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        xor	a0, a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   lbu	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        addi	a0, a0, 1
+# CHECK-NEXT:  1      5     0.50    *                   lhu	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        sub	a0, a0, a0
+# CHECK-NEXT:  1      5     0.50    *                   lwu	a0, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        addw	a0, a0, a0
+# CHECK-NEXT:  1      1     1.00                        jr	a0
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - XS2ALU
+# CHECK-NEXT: [0.1] - XS2ALU
+# CHECK-NEXT: [0.2] - XS2ALU
+# CHECK-NEXT: [0.3] - XS2ALU
+# CHECK-NEXT: [1.0] - XS2FMAC
+# CHECK-NEXT: [1.1] - XS2FMAC
+# CHECK-NEXT: [1.2] - XS2FMAC
+# CHECK-NEXT: [1.3] - XS2FMAC
+# CHECK-NEXT: [2.0] - XS2FMISC
+# CHECK-NEXT: [2.1] - XS2FMISC
+# CHECK-NEXT: [3.0] - XS2LD
+# CHECK-NEXT: [3.1] - XS2LD
+# CHECK-NEXT: [4.0] - XS2MDU
+# CHECK-NEXT: [4.1] - XS2MDU
+# CHECK-NEXT: [5]   - XS2MISC
+# CHECK-NEXT: [6.0] - XS2ST
+# CHECK-NEXT: [6.1] - XS2ST
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]
+# CHECK-NEXT: 17.00  17.00  17.00  18.00   -      -      -      -      -      -     3.00   4.00   4.00   4.00   7.00    -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]  Instructions:
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     lui	a0, 1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     auipc	a1, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a1
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addi	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     addw	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     addiw	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sub	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     subw	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     and	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     andi	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     or	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     ori	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     xor	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     xori	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sll	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     slli	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sllw	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     slliw	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     srl	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     srli	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     srlw	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     srliw	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sra	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     srai	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sraw	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     sraiw	a0, a0, 1
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     slt	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     slti	a0, a0, 1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sltu	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     seqz	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -     mul	a0, a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     mulw	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     beq	a0, a0, .Ltmp0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     bne	a0, a0, .Ltmp1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     blt	a0, a0, .Ltmp2
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     bltu	a0, a0, .Ltmp3
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     bge	a0, a0, .Ltmp4
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     bgeu	a0, a0, .Ltmp5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     add.uw	a0, a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     slli.uw	a0, a0, 1
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sh1add.uw	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sh2add.uw	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     sh3add.uw	a0, a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sh1add	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sh2add	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sh3add	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     andn	a0, a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     orn	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     xnor	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     sext.b	a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     sext.h	a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     zext.h	a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     min	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     minu	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     max	a0, a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     maxu	a0, a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     rol	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     ror	a0, a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     rori	a0, a0, 1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -     clz	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     clzw	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -     ctz	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     ctzw	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -     cpop	a0, a0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     cpopw	a0, a0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     rev8	a0, a0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     orc.b	a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     lb	a0, 0(a0)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     add	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -     lh	a0, 0(a0)
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     and	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     lw	a0, 0(a0)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     or	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -     ld	a0, 0(a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -     xor	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     lbu	a0, 0(a0)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addi	a0, a0, 1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -     lhu	a0, 0(a0)
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sub	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     lwu	a0, 0(a0)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     addw	a0, a0, a0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     jr	a0
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0123456789          0123456789          0123456789          0123456789          0123456789          0123456789
+# CHECK-NEXT: Index     0123456789          0123456789          0123456789          0123456789          0123456789          0123456789          0123
+
+# CHECK:      [0,0]     DeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   lui	a0, 1
+# CHECK-NEXT: [0,1]     DeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   auipc	a1, 1
+# CHECK-NEXT: [0,2]     D=eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a1
+# CHECK-NEXT: [0,3]     D==eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   addi	a0, a0, 1
+# CHECK-NEXT: [0,4]     D===eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   addw	a0, a0, a0
+# CHECK-NEXT: [0,5]     D====eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   addiw	a0, a0, 1
+# CHECK-NEXT: [0,6]     .D====eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sub	a0, a0, a0
+# CHECK-NEXT: [0,7]     .D=====eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   subw	a0, a0, a0
+# CHECK-NEXT: [0,8]     .D======eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   and	a0, a0, a0
+# CHECK-NEXT: [0,9]     .D=======eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   andi	a0, a0, 1
+# CHECK-NEXT: [0,10]    .D========eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   or	a0, a0, a0
+# CHECK-NEXT: [0,11]    .D=========eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   ori	a0, a0, 1
+# CHECK-NEXT: [0,12]    . D=========eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   xor	a0, a0, a0
+# CHECK-NEXT: [0,13]    . D==========eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   xori	a0, a0, 1
+# CHECK-NEXT: [0,14]    . D===========eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sll	a0, a0, a0
+# CHECK-NEXT: [0,15]    . D============eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   slli	a0, a0, 1
+# CHECK-NEXT: [0,16]    . D=============eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sllw	a0, a0, a0
+# CHECK-NEXT: [0,17]    . D==============eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   slliw	a0, a0, 1
+# CHECK-NEXT: [0,18]    .  D==============eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   srl	a0, a0, a0
+# CHECK-NEXT: [0,19]    .  D===============eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   srli	a0, a0, 1
+# CHECK-NEXT: [0,20]    .   D===============eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   srlw	a0, a0, a0
+# CHECK-NEXT: [0,21]    .    D===============eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   srliw	a0, a0, 1
+# CHECK-NEXT: [0,22]    .    .D===============eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sra	a0, a0, a0
+# CHECK-NEXT: [0,23]    .    . D===============eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   srai	a0, a0, 1
+# CHECK-NEXT: [0,24]    .    .  D===============eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sraw	a0, a0, a0
+# CHECK-NEXT: [0,25]    .    .   D===============eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sraiw	a0, a0, 1
+# CHECK-NEXT: [0,26]    .    .    D===============eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   slt	a0, a0, a0
+# CHECK-NEXT: [0,27]    .    .    .D===============eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   slti	a0, a0, 1
+# CHECK-NEXT: [0,28]    .    .    . D===============eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sltu	a0, a0, a0
+# CHECK-NEXT: [0,29]    .    .    .  D===============eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   seqz	a0, a0
+# CHECK-NEXT: [0,30]    .    .    .  D================eeeER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   mul	a0, a0, a0
+# CHECK-NEXT: [0,31]    .    .    .   D==================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,32]    .    .    .   D===================eeeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   mulw	a0, a0, a0
+# CHECK-NEXT: [0,33]    .    .    .    D=====================eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,34]    .    .    .    D======================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   beq	a0, a0, .Ltmp0
+# CHECK-NEXT: [0,35]    .    .    .    .D=====================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,36]    .    .    .    .D======================eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   bne	a0, a0, .Ltmp1
+# CHECK-NEXT: [0,37]    .    .    .    . D=====================eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,38]    .    .    .    . D======================eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   blt	a0, a0, .Ltmp2
+# CHECK-NEXT: [0,39]    .    .    .    .  D=====================eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,40]    .    .    .    .  D======================eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   bltu	a0, a0, .Ltmp3
+# CHECK-NEXT: [0,41]    .    .    .    .   D=====================eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,42]    .    .    .    .   D======================eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   bge	a0, a0, .Ltmp4
+# CHECK-NEXT: [0,43]    .    .    .    .    D=====================eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,44]    .    .    .    .    D======================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   bgeu	a0, a0, .Ltmp5
+# CHECK-NEXT: [0,45]    .    .    .    .    .D=====================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   add.uw	a0, a0, a0
+# CHECK-NEXT: [0,46]    .    .    .    .    . D=====================eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   slli.uw	a0, a0, 1
+# CHECK-NEXT: [0,47]    .    .    .    .    .  D=====================eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh1add.uw	a0, a0, a0
+# CHECK-NEXT: [0,48]    .    .    .    .    .   D=====================eER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh2add.uw	a0, a0, a0
+# CHECK-NEXT: [0,49]    .    .    .    .    .    D=====================eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh3add.uw	a0, a0, a0
+# CHECK-NEXT: [0,50]    .    .    .    .    .    .D=====================eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh1add	a0, a0, a0
+# CHECK-NEXT: [0,51]    .    .    .    .    .    . D=====================eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh2add	a0, a0, a0
+# CHECK-NEXT: [0,52]    .    .    .    .    .    .  D=====================eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   sh3add	a0, a0, a0
+# CHECK-NEXT: [0,53]    .    .    .    .    .    .   D=====================eER .    .    .    .    .    .    .    .    .    .    .    .    .    .  .   andn	a0, a0, a0
+# CHECK-NEXT: [0,54]    .    .    .    .    .    .    .  D==================eER.    .    .    .    .    .    .    .    .    .    .    .    .    .  .   orn	a0, a0, a0
+# CHECK-NEXT: [0,55]    .    .    .    .    .    .    .    . D===============eER    .    .    .    .    .    .    .    .    .    .    .    .    .  .   xnor	a0, a0, a0
+# CHECK-NEXT: [0,56]    .    .    .    .    .    .    .    .  D===============eER   .    .    .    .    .    .    .    .    .    .    .    .    .  .   sext.b	a0, a0
+# CHECK-NEXT: [0,57]    .    .    .    .    .    .    .    .   D===============eER  .    .    .    .    .    .    .    .    .    .    .    .    .  .   sext.h	a0, a0
+# CHECK-NEXT: [0,58]    .    .    .    .    .    .    .    .    D===============eER .    .    .    .    .    .    .    .    .    .    .    .    .  .   zext.h	a0, a0
+# CHECK-NEXT: [0,59]    .    .    .    .    .    .    .    .    .D===============eER.    .    .    .    .    .    .    .    .    .    .    .    .  .   min	a0, a0, a0
+# CHECK-NEXT: [0,60]    .    .    .    .    .    .    .    .    . D===============eER    .    .    .    .    .    .    .    .    .    .    .    .  .   minu	a0, a0, a0
+# CHECK-NEXT: [0,61]    .    .    .    .    .    .    .    .    .  D===============eER   .    .    .    .    .    .    .    .    .    .    .    .  .   max	a0, a0, a0
+# CHECK-NEXT: [0,62]    .    .    .    .    .    .    .    .    .   D===============eER  .    .    .    .    .    .    .    .    .    .    .    .  .   maxu	a0, a0, a0
+# CHECK-NEXT: [0,63]    .    .    .    .    .    .    .    .    .    D===============eER .    .    .    .    .    .    .    .    .    .    .    .  .   rol	a0, a0, a0
+# CHECK-NEXT: [0,64]    .    .    .    .    .    .    .    .    .    .D===============eER.    .    .    .    .    .    .    .    .    .    .    .  .   ror	a0, a0, a0
+# CHECK-NEXT: [0,65]    .    .    .    .    .    .    .    .    .    . D===============eER    .    .    .    .    .    .    .    .    .    .    .  .   rori	a0, a0, 1
+# CHECK-NEXT: [0,66]    .    .    .    .    .    .    .    .    .    . D================eeeER .    .    .    .    .    .    .    .    .    .    .  .   clz	a0, a0
+# CHECK-NEXT: [0,67]    .    .    .    .    .    .    .    .    .    . D===================eeeER   .    .    .    .    .    .    .    .    .    .  .   clzw	a0, a0
+# CHECK-NEXT: [0,68]    .    .    .    .    .    .    .    .    .    . D======================eeeER.    .    .    .    .    .    .    .    .    .  .   ctz	a0, a0
+# CHECK-NEXT: [0,69]    .    .    .    .    .    .    .    .    .    . D=========================eeeER  .    .    .    .    .    .    .    .    .  .   ctzw	a0, a0
+# CHECK-NEXT: [0,70]    .    .    .    .    .    .    .    .    .    . D============================eeeER    .    .    .    .    .    .    .    .  .   cpop	a0, a0
+# CHECK-NEXT: [0,71]    .    .    .    .    .    .    .    .    .    .  D==============================eER   .    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,72]    .    .    .    .    .    .    .    .    .    .  D===============================eeeER.    .    .    .    .    .    .    .  .   cpopw	a0, a0
+# CHECK-NEXT: [0,73]    .    .    .    .    .    .    .    .    .    .   D=================================eER    .    .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,74]    .    .    .    .    .    .    .    .    .    .    D=================================eER   .    .    .    .    .    .    .  .   rev8	a0, a0
+# CHECK-NEXT: [0,75]    .    .    .    .    .    .    .    .    .    .    .D=================================eER  .    .    .    .    .    .    .  .   orc.b	a0, a0
+# CHECK-NEXT: [0,76]    .    .    .    .    .    .    .    .    .    .    .D==================================eeeeeER  .    .    .    .    .    .  .   lb	a0, 0(a0)
+# CHECK-NEXT: [0,77]    .    .    .    .    .    .    .    .    .    .    . D=====================================eER  .    .    .    .    .    .  .   add	a0, a0, a0
+# CHECK-NEXT: [0,78]    .    .    .    .    .    .    .    .    .    .    . D======================================eeeeeER  .    .    .    .    .  .   lh	a0, 0(a0)
+# CHECK-NEXT: [0,79]    .    .    .    .    .    .    .    .    .    .    .  D=========================================eER  .    .    .    .    .  .   and	a0, a0, a0
+# CHECK-NEXT: [0,80]    .    .    .    .    .    .    .    .    .    .    .  D==========================================eeeeeER  .    .    .    .  .   lw	a0, 0(a0)
+# CHECK-NEXT: [0,81]    .    .    .    .    .    .    .    .    .    .    .   D=============================================eER  .    .    .    .  .   or	a0, a0, a0
+# CHECK-NEXT: [0,82]    .    .    .    .    .    .    .    .    .    .    .   D==============================================eeeeeER  .    .    .  .   ld	a0, 0(a0)
+# CHECK-NEXT: [0,83]    .    .    .    .    .    .    .    .    .    .    .    D=================================================eER  .    .    .  .   xor	a0, a0, a0
+# CHECK-NEXT: [0,84]    .    .    .    .    .    .    .    .    .    .    .    D==================================================eeeeeER  .    .  .   lbu	a0, 0(a0)
+# CHECK-NEXT: [0,85]    .    .    .    .    .    .    .    .    .    .    .    .D=====================================================eER  .    .  .   addi	a0, a0, 1
+# CHECK-NEXT: [0,86]    .    .    .    .    .    .    .    .    .    .    .    .D======================================================eeeeeER  .  .   lhu	a0, 0(a0)
+# CHECK-NEXT: [0,87]    .    .    .    .    .    .    .    .    .    .    .    . D=========================================================eER  .  .   sub	a0, a0, a0
+# CHECK-NEXT: [0,88]    .    .    .    .    .    .    .    .    .    .    .    . D==========================================================eeeeeER.   lwu	a0, 0(a0)
+# CHECK-NEXT: [0,89]    .    .    .    .    .    .    .    .    .    .    .    .  D=============================================================eER.   addw	a0, a0, a0
+# CHECK-NEXT: [0,90]    .    .    .    .    .    .    .    .    .    .    .    .  D==============================================================eER   jr	a0
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       lui	a0, 1
+# CHECK-NEXT: 1.     1     1.0    1.0    0.0       auipc	a1, 1
+# CHECK-NEXT: 2.     1     2.0    0.0    0.0       add	a0, a0, a1
+# CHECK-NEXT: 3.     1     3.0    0.0    0.0       addi	a0, a0, 1
+# CHECK-NEXT: 4.     1     4.0    0.0    0.0       addw	a0, a0, a0
+# CHECK-NEXT: 5.     1     5.0    0.0    0.0       addiw	a0, a0, 1
+# CHECK-NEXT: 6.     1     5.0    0.0    0.0       sub	a0, a0, a0
+# CHECK-NEXT: 7.     1     6.0    0.0    0.0       subw	a0, a0, a0
+# CHECK-NEXT: 8.     1     7.0    0.0    0.0       and	a0, a0, a0
+# CHECK-NEXT: 9.     1     8.0    0.0    0.0       andi	a0, a0, 1
+# CHECK-NEXT: 10.    1     9.0    0.0    0.0       or	a0, a0, a0
+# CHECK-NEXT: 11.    1     10.0   0.0    0.0       ori	a0, a0, 1
+# CHECK-NEXT: 12.    1     10.0   0.0    0.0       xor	a0, a0, a0
+# CHECK-NEXT: 13.    1     11.0   0.0    0.0       xori	a0, a0, 1
+# CHECK-NEXT: 14.    1     12.0   0.0    0.0       sll	a0, a0, a0
+# CHECK-NEXT: 15.    1     13.0   0.0    0.0       slli	a0, a0, 1
+# CHECK-NEXT: 16.    1     14.0   0.0    0.0       sllw	a0, a0, a0
+# CHECK-NEXT: 17.    1     15.0   0.0    0.0       slliw	a0, a0, 1
+# CHECK-NEXT: 18.    1     15.0   0.0    0.0       srl	a0, a0, a0
+# CHECK-NEXT: 19.    1     16.0   0.0    0.0       srli	a0, a0, 1
+# CHECK-NEXT: 20.    1     16.0   0.0    0.0       srlw	a0, a0, a0
+# CHECK-NEXT: 21.    1     16.0   0.0    0.0       srliw	a0, a0, 1
+# CHECK-NEXT: 22.    1     16.0   0.0    0.0       sra	a0, a0, a0
+# CHECK-NEXT: 23.    1     16.0   0.0    0.0       srai	a0, a0, 1
+# CHECK-NEXT: 24.    1     16.0   0.0    0.0       sraw	a0, a0, a0
+# CHECK-NEXT: 25.    1     16.0   0.0    0.0       sraiw	a0, a0, 1
+# CHECK-NEXT: 26.    1     16.0   0.0    0.0       slt	a0, a0, a0
+# CHECK-NEXT: 27.    1     16.0   0.0    0.0       slti	a0, a0, 1
+# CHECK-NEXT: 28.    1     16.0   0.0    0.0       sltu	a0, a0, a0
+# CHECK-NEXT: 29.    1     16.0   0.0    0.0       seqz	a0, a0
+# CHECK-NEXT: 30.    1     17.0   0.0    0.0       mul	a0, a0, a0
+# CHECK-NEXT: 31.    1     19.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 32.    1     20.0   0.0    0.0       mulw	a0, a0, a0
+# CHECK-NEXT: 33.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 34.    1     23.0   0.0    0.0       beq	a0, a0, .Ltmp0
+# CHECK-NEXT: 35.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 36.    1     23.0   0.0    0.0       bne	a0, a0, .Ltmp1
+# CHECK-NEXT: 37.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 38.    1     23.0   0.0    0.0       blt	a0, a0, .Ltmp2
+# CHECK-NEXT: 39.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 40.    1     23.0   0.0    0.0       bltu	a0, a0, .Ltmp3
+# CHECK-NEXT: 41.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 42.    1     23.0   0.0    0.0       bge	a0, a0, .Ltmp4
+# CHECK-NEXT: 43.    1     22.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 44.    1     23.0   0.0    0.0       bgeu	a0, a0, .Ltmp5
+# CHECK-NEXT: 45.    1     22.0   0.0    0.0       add.uw	a0, a0, a0
+# CHECK-NEXT: 46.    1     22.0   0.0    0.0       slli.uw	a0, a0, 1
+# CHECK-NEXT: 47.    1     22.0   0.0    0.0       sh1add.uw	a0, a0, a0
+# CHECK-NEXT: 48.    1     22.0   0.0    0.0       sh2add.uw	a0, a0, a0
+# CHECK-NEXT: 49.    1     22.0   0.0    0.0       sh3add.uw	a0, a0, a0
+# CHECK-NEXT: 50.    1     22.0   0.0    0.0       sh1add	a0, a0, a0
+# CHECK-NEXT: 51.    1     22.0   0.0    0.0       sh2add	a0, a0, a0
+# CHECK-NEXT: 52.    1     22.0   0.0    0.0       sh3add	a0, a0, a0
+# CHECK-NEXT: 53.    1     22.0   0.0    0.0       andn	a0, a0, a0
+# CHECK-NEXT: 54.    1     19.0   0.0    0.0       orn	a0, a0, a0
+# CHECK-NEXT: 55.    1     16.0   0.0    0.0       xnor	a0, a0, a0
+# CHECK-NEXT: 56.    1     16.0   0.0    0.0       sext.b	a0, a0
+# CHECK-NEXT: 57.    1     16.0   0.0    0.0       sext.h	a0, a0
+# CHECK-NEXT: 58.    1     16.0   0.0    0.0       zext.h	a0, a0
+# CHECK-NEXT: 59.    1     16.0   0.0    0.0       min	a0, a0, a0
+# CHECK-NEXT: 60.    1     16.0   0.0    0.0       minu	a0, a0, a0
+# CHECK-NEXT: 61.    1     16.0   0.0    0.0       max	a0, a0, a0
+# CHECK-NEXT: 62.    1     16.0   0.0    0.0       maxu	a0, a0, a0
+# CHECK-NEXT: 63.    1     16.0   0.0    0.0       rol	a0, a0, a0
+# CHECK-NEXT: 64.    1     16.0   0.0    0.0       ror	a0, a0, a0
+# CHECK-NEXT: 65.    1     16.0   0.0    0.0       rori	a0, a0, 1
+# CHECK-NEXT: 66.    1     17.0   0.0    0.0       clz	a0, a0
+# CHECK-NEXT: 67.    1     20.0   0.0    0.0       clzw	a0, a0
+# CHECK-NEXT: 68.    1     23.0   0.0    0.0       ctz	a0, a0
+# CHECK-NEXT: 69.    1     26.0   0.0    0.0       ctzw	a0, a0
+# CHECK-NEXT: 70.    1     29.0   0.0    0.0       cpop	a0, a0
+# CHECK-NEXT: 71.    1     31.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 72.    1     32.0   0.0    0.0       cpopw	a0, a0
+# CHECK-NEXT: 73.    1     34.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 74.    1     34.0   0.0    0.0       rev8	a0, a0
+# CHECK-NEXT: 75.    1     34.0   0.0    0.0       orc.b	a0, a0
+# CHECK-NEXT: 76.    1     35.0   0.0    0.0       lb	a0, 0(a0)
+# CHECK-NEXT: 77.    1     38.0   0.0    0.0       add	a0, a0, a0
+# CHECK-NEXT: 78.    1     39.0   0.0    0.0       lh	a0, 0(a0)
+# CHECK-NEXT: 79.    1     42.0   0.0    0.0       and	a0, a0, a0
+# CHECK-NEXT: 80.    1     43.0   0.0    0.0       lw	a0, 0(a0)
+# CHECK-NEXT: 81.    1     46.0   0.0    0.0       or	a0, a0, a0
+# CHECK-NEXT: 82.    1     47.0   0.0    0.0       ld	a0, 0(a0)
+# CHECK-NEXT: 83.    1     50.0   0.0    0.0       xor	a0, a0, a0
+# CHECK-NEXT: 84.    1     51.0   0.0    0.0       lbu	a0, 0(a0)
+# CHECK-NEXT: 85.    1     54.0   0.0    0.0       addi	a0, a0, 1
+# CHECK-NEXT: 86.    1     55.0   0.0    0.0       lhu	a0, 0(a0)
+# CHECK-NEXT: 87.    1     58.0   0.0    0.0       sub	a0, a0, a0
+# CHECK-NEXT: 88.    1     59.0   0.0    0.0       lwu	a0, 0(a0)
+# CHECK-NEXT: 89.    1     62.0   0.0    0.0       addw	a0, a0, a0
+# CHECK-NEXT: 90.    1     63.0   0.0    0.0       jr	a0
+# CHECK-NEXT:        1     22.7   0.0    0.0       <total>
diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s
new file mode 100644
index 00000000000000..e1925e7647e33c
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu -timeline -iterations=1 < %s | FileCheck %s
+
+# Test XiangShan load to ALU (4 cycles)
+ld a1, 0(a0)
+addi a2, a1, 1
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      8
+# CHECK-NEXT: Total uOps:        2
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    0.25
+# CHECK-NEXT: IPC:               0.25
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      5     0.50    *                   ld	a1, 0(a0)
+# CHECK-NEXT:  1      1     0.25                        addi	a2, a1, 1
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - XS2ALU
+# CHECK-NEXT: [0.1] - XS2ALU
+# CHECK-NEXT: [0.2] - XS2ALU
+# CHECK-NEXT: [0.3] - XS2ALU
+# CHECK-NEXT: [1.0] - XS2FMAC
+# CHECK-NEXT: [1.1] - XS2FMAC
+# CHECK-NEXT: [1.2] - XS2FMAC
+# CHECK-NEXT: [1.3] - XS2FMAC
+# CHECK-NEXT: [2.0] - XS2FMISC
+# CHECK-NEXT: [2.1] - XS2FMISC
+# CHECK-NEXT: [3.0] - XS2LD
+# CHECK-NEXT: [3.1] - XS2LD
+# CHECK-NEXT: [4.0] - XS2MDU
+# CHECK-NEXT: [4.1] - XS2MDU
+# CHECK-NEXT: [5]   - XS2MISC
+# CHECK-NEXT: [6.0] - XS2ST
+# CHECK-NEXT: [6.1] - XS2ST
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -     1.00    -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [0.2]  [0.3]  [1.0]  [1.1]  [1.2]  [1.3]  [2.0]  [2.1]  [3.0]  [3.1]  [4.0]  [4.1]  [5]    [6.0]  [6.1]  Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     ld	a1, 0(a0)
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     addi	a2, a1, 1
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     01234567
+
+# CHECK:      [0,0]     DeeeeeER   ld	a1, 0(a0)
+# CHECK-NEXT: [0,1]     D====eER   addi	a2, a1, 1
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       ld	a1, 0(a0)
+# CHECK-NEXT: 1.     1     5.0    0.0    0.0       addi	a2, a1, 1
+# CHECK-NEXT:        1     3.0    0.5    0.0       <total>

>From a57f178f73606558df4dd9ad8e2c9ddff584cd4b Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 8 Nov 2023 13:12:43 +0800
Subject: [PATCH 2/6] fixup! [RISCV] Add sched model for XiangShan-NanHu

Add unsupported features.
---
 llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index 24b5922a23df14..3b81a696720478 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -24,7 +24,8 @@ def XiangShanNanHuModel : SchedMachineModel {
   let MispredictPenalty = 11; // Based on estimate of pipeline depth.
   let CompleteModel = 0;
   let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
-  let UnsupportedFeatures = [];
+  let UnsupportedFeatures = [HasStdExtZcmt, HasStdExtZkr, HasVInstructions,
+                             HasVInstructionsI64];
 }
 
 let SchedModel = XiangShanNanHuModel in {

>From 53c4c197d4cea50e2ace0878f5fa6a966bb6febb Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 17 Nov 2023 15:08:04 +0800
Subject: [PATCH 3/6] fixup! [RISCV] Add sched model for XiangShan-NanHu

Fix latency of zbs instructions
---
 .../lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index 3b81a696720478..02874fc1096fd0 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -91,6 +91,12 @@ def : WriteRes<WriteBREV8, [XS2ALU]>;
 def : WriteRes<WritePACK, [XS2ALU]>;
 def : WriteRes<WritePACK32, [XS2ALU]>;
 def : WriteRes<WriteZIP, [XS2ALU]>;
+
+// Zbs
+def : WriteRes<WriteSingleBit, [XS2ALU]>;
+def : WriteRes<WriteSingleBitImm, [XS2ALU]>;
+def : WriteRes<WriteBEXT, [XS2ALU]>;
+def : WriteRes<WriteBEXTI, [XS2ALU]>;
 }
 
 let Latency = 3 in {
@@ -102,12 +108,6 @@ def : WriteRes<WriteCTZ32, [XS2MDU]>;
 def : WriteRes<WriteCPOP, [XS2MDU]>;
 def : WriteRes<WriteCPOP32, [XS2MDU]>;
 
-// Zbs
-def : WriteRes<WriteSingleBit, [XS2MDU]>;
-def : WriteRes<WriteSingleBitImm, [XS2MDU]>;
-def : WriteRes<WriteBEXT, [XS2MDU]>;
-def : WriteRes<WriteBEXTI, [XS2MDU]>;
-
 // Zbkc
 def : WriteRes<WriteCLMUL, [XS2MDU]>;
 
@@ -289,8 +289,8 @@ def : XS2LoadToALUBypass<ReadREV8>;
 // Zbkc
 def : ReadAdvance<ReadCLMUL, 0>;
 // Zbs
-def : ReadAdvance<ReadSingleBit, 0>;
-def : ReadAdvance<ReadSingleBitImm, 0>;
+def : XS2LoadToALUBypass<ReadSingleBit>;
+def : XS2LoadToALUBypass<ReadSingleBitImm>;
 // Zbkb
 def : XS2LoadToALUBypass<ReadBREV8>;
 def : XS2LoadToALUBypass<ReadPACK>;

>From 9ae633301fd2d0582854d77f4e60c22eebfacd21 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 9 Feb 2024 11:54:56 +0800
Subject: [PATCH 4/6] [RISCV] Rebase on the top of
 89f87c387627150d342722b79c78cea2311cddf7

---
 llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index 02874fc1096fd0..a473391372e238 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -305,4 +305,5 @@ defm : UnsupportedSchedV;
 defm : UnsupportedSchedZfa;
 defm : UnsupportedSchedZfh;
 defm : UnsupportedSchedSFB;
+defm : UnsupportedSchedZabha;
 }

>From d0d8fda719835e1a09d0236f28ac23f7fb3fe560 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 11 Feb 2024 08:01:09 +0800
Subject: [PATCH 5/6] [RISCV] Tune features for XiangShan-NanHu

---
 llvm/lib/Target/RISCV/RISCVProcessors.td          | 6 +++++-
 llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 -
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index ec50668aaa1681..8c75df41f5e395 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -348,4 +348,8 @@ def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
                                            FeatureStdExtZksh,
                                            FeatureStdExtSvinval,
                                            FeatureStdExtZicbom,
-                                           FeatureStdExtZicboz]>;
+                                           FeatureStdExtZicboz],
+                                           [TuneNoDefaultUnroll,
+                                            TuneZExtHFusion,
+                                            TuneZExtWFusion,
+                                            TuneShiftedZExtWFusion]>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index a473391372e238..36d2898de8466b 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -23,7 +23,6 @@ def XiangShanNanHuModel : SchedMachineModel {
   let LoadLatency = 4;
   let MispredictPenalty = 11; // Based on estimate of pipeline depth.
   let CompleteModel = 0;
-  let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
   let UnsupportedFeatures = [HasStdExtZcmt, HasStdExtZkr, HasVInstructions,
                              HasVInstructionsI64];
 }

>From a95ae0bc22c145f25f19142d59b5fbca3fe3ba24 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 11 Feb 2024 15:15:09 +0800
Subject: [PATCH 6/6] [RISCV] Fix header comments.

---
 llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index 36d2898de8466b..667b5983cb401c 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -1,12 +1,12 @@
-//==- RISCVSchedXiangShanNanHu.td - XiangShan-NanHu Scheduling Definitions --*- tablegen -*-=//
+//==- RISCVSchedXiangShanNanHu.td - XS-NanHu Scheduling Defs -*- tablegen -*-=//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===-------------------------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
-//===-------------------------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
 // XiangShan is a high-performance open-source RISC-V processor developed by
 // the Institute of Computing Technology (ICT), Chinese Academy of Sciences.



More information about the llvm-commits mailing list