[llvm] [RISCV] Add sched model for XiangShan-NanHu (PR #70232)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 10 23:15:36 PST 2024
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/70232
>From ea3e3d5d3fc612e9650b4038cee4dffcb2182c5f Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 28 Oct 2023 20:46:37 +0800
Subject: [PATCH 1/6] [RISCV] Add sched model for XiangShan-NanHu
Co-authored-by: SForeKeeper <zkliu6 at gmail.com>
---
llvm/lib/Target/RISCV/RISCV.td | 1 +
llvm/lib/Target/RISCV/RISCVProcessors.td | 2 +-
.../Target/RISCV/RISCVSchedXiangShanNanHu.td | 307 ++++++++++
.../llvm-mca/RISCV/XiangShan/cascade-fma.s | 53 ++
.../llvm-mca/RISCV/XiangShan/gpr-bypass.s | 527 ++++++++++++++++++
.../llvm-mca/RISCV/XiangShan/load-to-alu.s | 73 +++
6 files changed, 962 insertions(+), 1 deletion(-)
create mode 100644 llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
create mode 100644 llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s
create mode 100644 llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s
create mode 100644 llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 27d52c16a4f39d..575bd4c9d3561d 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -44,6 +44,7 @@ include "RISCVSchedRocket.td"
include "RISCVSchedSiFive7.td"
include "RISCVSchedSiFiveP400.td"
include "RISCVSchedSyntacoreSCR1.td"
+include "RISCVSchedXiangShanNanHu.td"
//===----------------------------------------------------------------------===//
// RISC-V processors supported.
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 59bb811058d488..ec50668aaa1681 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -330,7 +330,7 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
TuneLDADDFusion]>;
def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
- NoSchedModel,
+ XiangShanNanHuModel,
[Feature64Bit,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
new file mode 100644
index 00000000000000..24b5922a23df14
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -0,0 +1,307 @@
+//==- RISCVSchedXiangShanNanHu.td - XiangShan-NanHu Scheduling Definitions --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-------------------------------------------------------------------------------------===//
+
+//===-------------------------------------------------------------------------------------===//
+
+// XiangShan is a high-performance open-source RISC-V processor developed by
+// the Institute of Computing Technology (ICT), Chinese Academy of Sciences.
+// Source: https://github.com/OpenXiangShan/XiangShan
+// Documentation: https://github.com/OpenXiangShan/XiangShan-doc
+
+// XiangShan-NanHu is the second generation of XiangShan processor series.
+// Overview: https://xiangshan-doc.readthedocs.io/zh-cn/latest/integration/overview/
+
+def XiangShanNanHuModel : SchedMachineModel {
+ let MicroOpBufferSize = 256;
+ let LoopMicroOpBufferSize = 48; // Instruction queue size
+ let IssueWidth = 6; // 6-way decode and dispatch
+ let LoadLatency = 4;
+ let MispredictPenalty = 11; // Based on estimate of pipeline depth.
+ let CompleteModel = 0;
+ let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
+ let UnsupportedFeatures = [];
+}
+
+let SchedModel = XiangShanNanHuModel in {
+
+// The reservation stations are distributed and grouped as 32-entry or 16-entry smaller ones.
+let BufferSize = 16 in {
+ def XS2ALU : ProcResource<4>;
+ def XS2MDU : ProcResource<2>;
+ def XS2MISC : ProcResource<1>;
+
+ def XS2FMAC : ProcResource<4>;
+ def XS2FMISC : ProcResource<2>;
+
+ // Load/Store queues are ignored.
+ def XS2LD : ProcResource<2>;
+ def XS2ST : ProcResource<2>;
+}
+
+// Branching
+def : WriteRes<WriteJmp, [XS2MISC]>;
+def : WriteRes<WriteJal, [XS2MISC]>;
+def : WriteRes<WriteJalr, [XS2MISC]>;
+
+// Integer arithmetic and logic
+let Latency = 1 in {
+def : WriteRes<WriteIALU, [XS2ALU]>;
+def : WriteRes<WriteIALU32, [XS2ALU]>;
+def : WriteRes<WriteShiftImm, [XS2ALU]>;
+def : WriteRes<WriteShiftImm32, [XS2ALU]>;
+def : WriteRes<WriteShiftReg, [XS2ALU]>;
+def : WriteRes<WriteShiftReg32, [XS2ALU]>;
+}
+
+// Integer multiplication
+let Latency = 3 in {
+def : WriteRes<WriteIMul, [XS2MDU]>;
+def : WriteRes<WriteIMul32, [XS2MDU]>;
+}
+
+// Integer division
+// SRT16 algorithm
+let Latency = 20, ReleaseAtCycles = [20] in {
+def : WriteRes<WriteIDiv32, [XS2MDU]>;
+def : WriteRes<WriteIDiv, [XS2MDU]>;
+}
+
+// Zb*
+let Latency = 1 in {
+// Zba
+def : WriteRes<WriteSHXADD, [XS2ALU]>;
+def : WriteRes<WriteSHXADD32, [XS2ALU]>;
+
+// Zbb
+def : WriteRes<WriteRotateImm, [XS2ALU]>;
+def : WriteRes<WriteRotateImm32, [XS2ALU]>;
+def : WriteRes<WriteRotateReg, [XS2ALU]>;
+def : WriteRes<WriteRotateReg32, [XS2ALU]>;
+def : WriteRes<WriteORCB, [XS2ALU]>;
+def : WriteRes<WriteREV8, [XS2ALU]>;
+
+// Zbkb
+def : WriteRes<WriteBREV8, [XS2ALU]>;
+def : WriteRes<WritePACK, [XS2ALU]>;
+def : WriteRes<WritePACK32, [XS2ALU]>;
+def : WriteRes<WriteZIP, [XS2ALU]>;
+}
+
+let Latency = 3 in {
+// Zbb
+def : WriteRes<WriteCLZ, [XS2MDU]>;
+def : WriteRes<WriteCLZ32, [XS2MDU]>;
+def : WriteRes<WriteCTZ, [XS2MDU]>;
+def : WriteRes<WriteCTZ32, [XS2MDU]>;
+def : WriteRes<WriteCPOP, [XS2MDU]>;
+def : WriteRes<WriteCPOP32, [XS2MDU]>;
+
+// Zbs
+def : WriteRes<WriteSingleBit, [XS2MDU]>;
+def : WriteRes<WriteSingleBitImm, [XS2MDU]>;
+def : WriteRes<WriteBEXT, [XS2MDU]>;
+def : WriteRes<WriteBEXTI, [XS2MDU]>;
+
+// Zbkc
+def : WriteRes<WriteCLMUL, [XS2MDU]>;
+
+// Zbkx
+def : WriteRes<WriteXPERM, [XS2MDU]>;
+}
+
+// Memory
+def : WriteRes<WriteSTB, [XS2ST]>;
+def : WriteRes<WriteSTH, [XS2ST]>;
+def : WriteRes<WriteSTW, [XS2ST]>;
+def : WriteRes<WriteSTD, [XS2ST]>;
+def : WriteRes<WriteFST32, [XS2ST]>;
+def : WriteRes<WriteFST64, [XS2ST]>;
+def : WriteRes<WriteAtomicSTW, [XS2ST]>;
+def : WriteRes<WriteAtomicSTD, [XS2ST]>;
+
+let Latency = 5 in {
+def : WriteRes<WriteLDB, [XS2LD]>;
+def : WriteRes<WriteLDH, [XS2LD]>;
+def : WriteRes<WriteLDW, [XS2LD]>;
+def : WriteRes<WriteLDD, [XS2LD]>;
+
+def : WriteRes<WriteAtomicW, [XS2LD]>;
+def : WriteRes<WriteAtomicD, [XS2LD]>;
+def : WriteRes<WriteAtomicLDW, [XS2LD]>;
+def : WriteRes<WriteAtomicLDD, [XS2LD]>;
+
+def : WriteRes<WriteFLD32, [XS2LD]>;
+def : WriteRes<WriteFLD64, [XS2LD]>;
+}
+
+// XiangShan-NanHu uses FuDian FPU instead of Berkeley HardFloat.
+// Documentation: https://github.com/OpenXiangShan/fudian
+
+let Latency = 3 in {
+def : WriteRes<WriteFAdd32, [XS2FMAC]>;
+def : WriteRes<WriteFSGNJ32, [XS2FMAC]>;
+def : WriteRes<WriteFMinMax32, [XS2FMAC]>;
+def : WriteRes<WriteFAdd64, [XS2FMAC]>;
+def : WriteRes<WriteFSGNJ64, [XS2FMAC]>;
+def : WriteRes<WriteFMinMax64, [XS2FMAC]>;
+
+def : WriteRes<WriteFCvtI32ToF32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtI32ToF64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtI64ToF32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtI64ToF64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF32ToI32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF32ToI64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF64ToI32, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF64ToI64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF32ToF64, [XS2FMAC]>;
+def : WriteRes<WriteFCvtF64ToF32, [XS2FMAC]>;
+
+def : WriteRes<WriteFClass32, [XS2FMAC]>;
+def : WriteRes<WriteFClass64, [XS2FMAC]>;
+def : WriteRes<WriteFCmp32, [XS2FMAC]>;
+def : WriteRes<WriteFCmp64, [XS2FMAC]>;
+def : WriteRes<WriteFMovF32ToI32, [XS2FMAC]>;
+def : WriteRes<WriteFMovI32ToF32, [XS2FMAC]>;
+def : WriteRes<WriteFMovF64ToI64, [XS2FMAC]>;
+def : WriteRes<WriteFMovI64ToF64, [XS2FMAC]>;
+}
+
+// FP multiplication
+let Latency = 3 in {
+def : WriteRes<WriteFMul32, [XS2FMAC]>;
+def : WriteRes<WriteFMul64, [XS2FMAC]>;
+}
+
+let Latency = 5 in {
+def : WriteRes<WriteFMA32, [XS2FMAC]>;
+def : WriteRes<WriteFMA64, [XS2FMAC]>;
+}
+
+// FP division
+def : WriteRes<WriteFDiv32, [XS2FMISC]> {
+ let Latency = 11;
+}
+def : WriteRes<WriteFDiv64, [XS2FMISC]> {
+ let Latency = 18;
+}
+
+def : WriteRes<WriteFSqrt32, [XS2FMISC]> {
+ let Latency = 17;
+}
+def : WriteRes<WriteFSqrt64, [XS2FMISC]> {
+ let Latency = 31;
+}
+
+// Others
+def : WriteRes<WriteCSR, [XS2MISC]>;
+def : WriteRes<WriteNop, []>;
+
+def : InstRW<[WriteIALU], (instrs COPY)>;
+
+// Bypass and advance
+
+class XS2LoadToALUBypass<SchedRead read>
+ : ReadAdvance<read, 1, [WriteLDB, WriteLDH, WriteLDW, WriteLDD, WriteAtomicW, WriteAtomicD, WriteAtomicLDW, WriteAtomicLDD]>;
+
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : XS2LoadToALUBypass<ReadIALU>;
+def : XS2LoadToALUBypass<ReadIALU32>;
+def : XS2LoadToALUBypass<ReadShiftImm>;
+def : XS2LoadToALUBypass<ReadShiftImm32>;
+def : XS2LoadToALUBypass<ReadShiftReg>;
+def : XS2LoadToALUBypass<ReadShiftReg32>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFStoreData, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
+def : ReadAdvance<ReadFAdd32, 0>;
+def : ReadAdvance<ReadFAdd64, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 2>; // Cascade FMA
+def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 2>; // Cascade FMA
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+
+// Zb*
+// Zba
+def : XS2LoadToALUBypass<ReadSHXADD>;
+def : XS2LoadToALUBypass<ReadSHXADD32>;
+// Zbb
+def : XS2LoadToALUBypass<ReadRotateImm>;
+def : XS2LoadToALUBypass<ReadRotateImm32>;
+def : XS2LoadToALUBypass<ReadRotateReg>;
+def : XS2LoadToALUBypass<ReadRotateReg32>;
+def : ReadAdvance<ReadCLZ, 0>;
+def : ReadAdvance<ReadCLZ32, 0>;
+def : ReadAdvance<ReadCTZ, 0>;
+def : ReadAdvance<ReadCTZ32, 0>;
+def : ReadAdvance<ReadCPOP, 0>;
+def : ReadAdvance<ReadCPOP32, 0>;
+def : XS2LoadToALUBypass<ReadORCB>;
+def : XS2LoadToALUBypass<ReadREV8>;
+// Zbkc
+def : ReadAdvance<ReadCLMUL, 0>;
+// Zbs
+def : ReadAdvance<ReadSingleBit, 0>;
+def : ReadAdvance<ReadSingleBitImm, 0>;
+// Zbkb
+def : XS2LoadToALUBypass<ReadBREV8>;
+def : XS2LoadToALUBypass<ReadPACK>;
+def : XS2LoadToALUBypass<ReadPACK32>;
+def : XS2LoadToALUBypass<ReadZIP>;
+// Zbkx
+def : ReadAdvance<ReadXPERM, 0>;
+
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedV;
+defm : UnsupportedSchedZfa;
+defm : UnsupportedSchedZfh;
+defm : UnsupportedSchedSFB;
+}
diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s
new file mode 100644
index 00000000000000..d44eb55ebf7598
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/cascade-fma.s
@@ -0,0 +1,53 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu < %s | FileCheck %s
+
+# Test XiangShan FuDian's cascade FMA, CPI = 3
+fmadd.s fa0, fa1, fa2, fa0
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 100
+# CHECK-NEXT: Total Cycles: 305
+# CHECK-NEXT: Total uOps: 100
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.25 fmadd.s fa0, fa1, fa2, fa0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - XS2ALU
+# CHECK-NEXT: [0.1] - XS2ALU
+# CHECK-NEXT: [0.2] - XS2ALU
+# CHECK-NEXT: [0.3] - XS2ALU
+# CHECK-NEXT: [1.0] - XS2FMAC
+# CHECK-NEXT: [1.1] - XS2FMAC
+# CHECK-NEXT: [1.2] - XS2FMAC
+# CHECK-NEXT: [1.3] - XS2FMAC
+# CHECK-NEXT: [2.0] - XS2FMISC
+# CHECK-NEXT: [2.1] - XS2FMISC
+# CHECK-NEXT: [3.0] - XS2LD
+# CHECK-NEXT: [3.1] - XS2LD
+# CHECK-NEXT: [4.0] - XS2MDU
+# CHECK-NEXT: [4.1] - XS2MDU
+# CHECK-NEXT: [5] - XS2MISC
+# CHECK-NEXT: [6.0] - XS2ST
+# CHECK-NEXT: [6.1] - XS2ST
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - - 0.25 0.25 0.25 0.25 - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - fmadd.s fa0, fa1, fa2, fa0
diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s
new file mode 100644
index 00000000000000..677fece1535a04
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/gpr-bypass.s
@@ -0,0 +1,527 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu -timeline \
+# RUN: -timeline-max-cycles=1000 -iterations=1 < %s | FileCheck %s
+
+lui a0, 1
+auipc a1, 1
+add a0, a0, a1
+addi a0, a0, 1
+addw a0, a0, a0
+addiw a0, a0, 1
+sub a0, a0, a0
+subw a0, a0, a0
+and a0, a0, a0
+andi a0, a0, 1
+or a0, a0, a0
+ori a0, a0, 1
+xor a0, a0, a0
+xori a0, a0, 1
+sll a0, a0, a0
+slli a0, a0, 1
+sllw a0, a0, a0
+slliw a0, a0, 1
+srl a0, a0, a0
+srli a0, a0, 1
+srlw a0, a0, a0
+srliw a0, a0, 1
+sra a0, a0, a0
+srai a0, a0, 1
+sraw a0, a0, a0
+sraiw a0, a0, 1
+slt a0, a0, a0
+slti a0, a0, 1
+sltu a0, a0, a0
+sltiu a0, a0, 1
+mul a0, a0, a0
+add a0, a0, a0
+mulw a0, a0, a0
+add a0, a0, a0
+beq a0, a0, 1f
+1:
+add a0, a0, a0
+bne a0, a0, 1f
+1:
+add a0, a0, a0
+blt a0, a0, 1f
+1:
+add a0, a0, a0
+bltu a0, a0, 1f
+1:
+add a0, a0, a0
+bge a0, a0, 1f
+1:
+add a0, a0, a0
+bgeu a0, a0, 1f
+1:
+# zba
+add.uw a0, a0, a0
+slli.uw a0, a0, 1
+sh1add.uw a0, a0, a0
+sh2add.uw a0, a0, a0
+sh3add.uw a0, a0, a0
+sh1add a0, a0, a0
+sh2add a0, a0, a0
+sh3add a0, a0, a0
+# zbb
+andn a0, a0, a0
+orn a0, a0, a0
+xnor a0, a0, a0
+sext.b a0, a0
+sext.h a0, a0
+zext.h a0, a0
+min a0, a0, a0
+minu a0, a0, a0
+max a0, a0, a0
+maxu a0, a0, a0
+rol a0, a0, a0
+ror a0, a0, a0
+rori a0, a0, 1
+clz a0, a0
+clzw a0, a0
+ctz a0, a0
+ctzw a0, a0
+cpop a0, a0
+add a0, a0, a0
+cpopw a0, a0
+add a0, a0, a0
+rev8 a0, a0
+orc.b a0, a0
+lb a0, 0(a0)
+add a0, a0, a0
+lh a0, 0(a0)
+and a0, a0, a0
+lw a0, 0(a0)
+or a0, a0, a0
+ld a0, 0(a0)
+xor a0, a0, a0
+lbu a0, 0(a0)
+addi a0, a0, 1
+lhu a0, 0(a0)
+sub a0, a0, a0
+lwu a0, 0(a0)
+addw a0, a0, a0
+jr a0
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 91
+# CHECK-NEXT: Total Cycles: 124
+# CHECK-NEXT: Total uOps: 91
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 0.73
+# CHECK-NEXT: IPC: 0.73
+# CHECK-NEXT: Block RThroughput: 17.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.25 lui a0, 1
+# CHECK-NEXT: 1 1 0.25 auipc a1, 1
+# CHECK-NEXT: 1 1 0.25 add a0, a0, a1
+# CHECK-NEXT: 1 1 0.25 addi a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 addw a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 addiw a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 sub a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 subw a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 and a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 andi a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 or a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 ori a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 xor a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 xori a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 sll a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 slli a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 sllw a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 slliw a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 srl a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 srli a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 srlw a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 srliw a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 sra a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 srai a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 sraw a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 sraiw a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 slt a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 slti a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 sltu a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 seqz a0, a0
+# CHECK-NEXT: 1 3 0.50 mul a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 add a0, a0, a0
+# CHECK-NEXT: 1 3 0.50 mulw a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 add a0, a0, a0
+# CHECK-NEXT: 1 1 1.00 beq a0, a0, .Ltmp0
+# CHECK-NEXT: 1 1 0.25 add a0, a0, a0
+# CHECK-NEXT: 1 1 1.00 bne a0, a0, .Ltmp1
+# CHECK-NEXT: 1 1 0.25 add a0, a0, a0
+# CHECK-NEXT: 1 1 1.00 blt a0, a0, .Ltmp2
+# CHECK-NEXT: 1 1 0.25 add a0, a0, a0
+# CHECK-NEXT: 1 1 1.00 bltu a0, a0, .Ltmp3
+# CHECK-NEXT: 1 1 0.25 add a0, a0, a0
+# CHECK-NEXT: 1 1 1.00 bge a0, a0, .Ltmp4
+# CHECK-NEXT: 1 1 0.25 add a0, a0, a0
+# CHECK-NEXT: 1 1 1.00 bgeu a0, a0, .Ltmp5
+# CHECK-NEXT: 1 1 0.25 add.uw a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 slli.uw a0, a0, 1
+# CHECK-NEXT: 1 1 0.25 sh1add.uw a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 sh2add.uw a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 sh3add.uw a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 sh1add a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 sh2add a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 sh3add a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 andn a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 orn a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 xnor a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 sext.b a0, a0
+# CHECK-NEXT: 1 1 0.25 sext.h a0, a0
+# CHECK-NEXT: 1 1 0.25 zext.h a0, a0
+# CHECK-NEXT: 1 1 0.25 min a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 minu a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 max a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 maxu a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 rol a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 ror a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 rori a0, a0, 1
+# CHECK-NEXT: 1 3 0.50 clz a0, a0
+# CHECK-NEXT: 1 3 0.50 clzw a0, a0
+# CHECK-NEXT: 1 3 0.50 ctz a0, a0
+# CHECK-NEXT: 1 3 0.50 ctzw a0, a0
+# CHECK-NEXT: 1 3 0.50 cpop a0, a0
+# CHECK-NEXT: 1 1 0.25 add a0, a0, a0
+# CHECK-NEXT: 1 3 0.50 cpopw a0, a0
+# CHECK-NEXT: 1 1 0.25 add a0, a0, a0
+# CHECK-NEXT: 1 1 0.25 rev8 a0, a0
+# CHECK-NEXT: 1 1 0.25 orc.b a0, a0
+# CHECK-NEXT: 1 5 0.50 * lb a0, 0(a0)
+# CHECK-NEXT: 1 1 0.25 add a0, a0, a0
+# CHECK-NEXT: 1 5 0.50 * lh a0, 0(a0)
+# CHECK-NEXT: 1 1 0.25 and a0, a0, a0
+# CHECK-NEXT: 1 5 0.50 * lw a0, 0(a0)
+# CHECK-NEXT: 1 1 0.25 or a0, a0, a0
+# CHECK-NEXT: 1 5 0.50 * ld a0, 0(a0)
+# CHECK-NEXT: 1 1 0.25 xor a0, a0, a0
+# CHECK-NEXT: 1 5 0.50 * lbu a0, 0(a0)
+# CHECK-NEXT: 1 1 0.25 addi a0, a0, 1
+# CHECK-NEXT: 1 5 0.50 * lhu a0, 0(a0)
+# CHECK-NEXT: 1 1 0.25 sub a0, a0, a0
+# CHECK-NEXT: 1 5 0.50 * lwu a0, 0(a0)
+# CHECK-NEXT: 1 1 0.25 addw a0, a0, a0
+# CHECK-NEXT: 1 1 1.00 jr a0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - XS2ALU
+# CHECK-NEXT: [0.1] - XS2ALU
+# CHECK-NEXT: [0.2] - XS2ALU
+# CHECK-NEXT: [0.3] - XS2ALU
+# CHECK-NEXT: [1.0] - XS2FMAC
+# CHECK-NEXT: [1.1] - XS2FMAC
+# CHECK-NEXT: [1.2] - XS2FMAC
+# CHECK-NEXT: [1.3] - XS2FMAC
+# CHECK-NEXT: [2.0] - XS2FMISC
+# CHECK-NEXT: [2.1] - XS2FMISC
+# CHECK-NEXT: [3.0] - XS2LD
+# CHECK-NEXT: [3.1] - XS2LD
+# CHECK-NEXT: [4.0] - XS2MDU
+# CHECK-NEXT: [4.1] - XS2MDU
+# CHECK-NEXT: [5] - XS2MISC
+# CHECK-NEXT: [6.0] - XS2ST
+# CHECK-NEXT: [6.1] - XS2ST
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1]
+# CHECK-NEXT: 17.00 17.00 17.00 18.00 - - - - - - 3.00 4.00 4.00 4.00 7.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - lui a0, 1
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - auipc a1, 1
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add a0, a0, a1
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - addi a0, a0, 1
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - addw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - addiw a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - sub a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - subw a0, a0, a0
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - and a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - andi a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - or a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - ori a0, a0, 1
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - xor a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - xori a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - sll a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - slli a0, a0, 1
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sllw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - slliw a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - srl a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - srli a0, a0, 1
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - srlw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - srliw a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - sra a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - srai a0, a0, 1
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sraw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - sraiw a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - slt a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - slti a0, a0, 1
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sltu a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - seqz a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - - - mul a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - mulw a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - add a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - beq a0, a0, .Ltmp0
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - add a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - bne a0, a0, .Ltmp1
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - add a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - blt a0, a0, .Ltmp2
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - bltu a0, a0, .Ltmp3
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - add a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - bge a0, a0, .Ltmp4
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - add a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - bgeu a0, a0, .Ltmp5
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - add.uw a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - slli.uw a0, a0, 1
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - sh1add.uw a0, a0, a0
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sh2add.uw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - sh3add.uw a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - sh1add a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - sh2add a0, a0, a0
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sh3add a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - andn a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - orn a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - xnor a0, a0, a0
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - sext.b a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - sext.h a0, a0
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - zext.h a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - min a0, a0, a0
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - minu a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - max a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - maxu a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - rol a0, a0, a0
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - ror a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - rori a0, a0, 1
+# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - - - clz a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - clzw a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - - - ctz a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - ctzw a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - - - cpop a0, a0
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - cpopw a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - add a0, a0, a0
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - rev8 a0, a0
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - orc.b a0, a0
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - lb a0, 0(a0)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - lh a0, 0(a0)
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - and a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - lw a0, 0(a0)
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - or a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - ld a0, 0(a0)
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - xor a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - lbu a0, 0(a0)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - addi a0, a0, 1
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - lhu a0, 0(a0)
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - sub a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - lwu a0, 0(a0)
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - addw a0, a0, a0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - jr a0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeER . . . . . . . . . . . . . . . . . . . . . . . . . lui a0, 1
+# CHECK-NEXT: [0,1] DeER . . . . . . . . . . . . . . . . . . . . . . . . . auipc a1, 1
+# CHECK-NEXT: [0,2] D=eER. . . . . . . . . . . . . . . . . . . . . . . . . add a0, a0, a1
+# CHECK-NEXT: [0,3] D==eER . . . . . . . . . . . . . . . . . . . . . . . . addi a0, a0, 1
+# CHECK-NEXT: [0,4] D===eER . . . . . . . . . . . . . . . . . . . . . . . . addw a0, a0, a0
+# CHECK-NEXT: [0,5] D====eER . . . . . . . . . . . . . . . . . . . . . . . . addiw a0, a0, 1
+# CHECK-NEXT: [0,6] .D====eER . . . . . . . . . . . . . . . . . . . . . . . . sub a0, a0, a0
+# CHECK-NEXT: [0,7] .D=====eER. . . . . . . . . . . . . . . . . . . . . . . . subw a0, a0, a0
+# CHECK-NEXT: [0,8] .D======eER . . . . . . . . . . . . . . . . . . . . . . . and a0, a0, a0
+# CHECK-NEXT: [0,9] .D=======eER . . . . . . . . . . . . . . . . . . . . . . . andi a0, a0, 1
+# CHECK-NEXT: [0,10] .D========eER . . . . . . . . . . . . . . . . . . . . . . . or a0, a0, a0
+# CHECK-NEXT: [0,11] .D=========eER . . . . . . . . . . . . . . . . . . . . . . . ori a0, a0, 1
+# CHECK-NEXT: [0,12] . D=========eER. . . . . . . . . . . . . . . . . . . . . . . xor a0, a0, a0
+# CHECK-NEXT: [0,13] . D==========eER . . . . . . . . . . . . . . . . . . . . . . xori a0, a0, 1
+# CHECK-NEXT: [0,14] . D===========eER . . . . . . . . . . . . . . . . . . . . . . sll a0, a0, a0
+# CHECK-NEXT: [0,15] . D============eER . . . . . . . . . . . . . . . . . . . . . . slli a0, a0, 1
+# CHECK-NEXT: [0,16] . D=============eER . . . . . . . . . . . . . . . . . . . . . . sllw a0, a0, a0
+# CHECK-NEXT: [0,17] . D==============eER. . . . . . . . . . . . . . . . . . . . . . slliw a0, a0, 1
+# CHECK-NEXT: [0,18] . D==============eER . . . . . . . . . . . . . . . . . . . . . srl a0, a0, a0
+# CHECK-NEXT: [0,19] . D===============eER . . . . . . . . . . . . . . . . . . . . . srli a0, a0, 1
+# CHECK-NEXT: [0,20] . D===============eER . . . . . . . . . . . . . . . . . . . . . srlw a0, a0, a0
+# CHECK-NEXT: [0,21] . D===============eER . . . . . . . . . . . . . . . . . . . . . srliw a0, a0, 1
+# CHECK-NEXT: [0,22] . .D===============eER. . . . . . . . . . . . . . . . . . . . . sra a0, a0, a0
+# CHECK-NEXT: [0,23] . . D===============eER . . . . . . . . . . . . . . . . . . . . srai a0, a0, 1
+# CHECK-NEXT: [0,24] . . D===============eER . . . . . . . . . . . . . . . . . . . . sraw a0, a0, a0
+# CHECK-NEXT: [0,25] . . D===============eER . . . . . . . . . . . . . . . . . . . . sraiw a0, a0, 1
+# CHECK-NEXT: [0,26] . . D===============eER . . . . . . . . . . . . . . . . . . . . slt a0, a0, a0
+# CHECK-NEXT: [0,27] . . .D===============eER. . . . . . . . . . . . . . . . . . . . slti a0, a0, 1
+# CHECK-NEXT: [0,28] . . . D===============eER . . . . . . . . . . . . . . . . . . . sltu a0, a0, a0
+# CHECK-NEXT: [0,29] . . . D===============eER . . . . . . . . . . . . . . . . . . . seqz a0, a0
+# CHECK-NEXT: [0,30] . . . D================eeeER. . . . . . . . . . . . . . . . . . . mul a0, a0, a0
+# CHECK-NEXT: [0,31] . . . D==================eER . . . . . . . . . . . . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,32] . . . D===================eeeER . . . . . . . . . . . . . . . . . . mulw a0, a0, a0
+# CHECK-NEXT: [0,33] . . . D=====================eER. . . . . . . . . . . . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,34] . . . D======================eER . . . . . . . . . . . . . . . . . beq a0, a0, .Ltmp0
+# CHECK-NEXT: [0,35] . . . .D=====================eER . . . . . . . . . . . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,36] . . . .D======================eER . . . . . . . . . . . . . . . . . bne a0, a0, .Ltmp1
+# CHECK-NEXT: [0,37] . . . . D=====================eER . . . . . . . . . . . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,38] . . . . D======================eER . . . . . . . . . . . . . . . . . blt a0, a0, .Ltmp2
+# CHECK-NEXT: [0,39] . . . . D=====================eER . . . . . . . . . . . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,40] . . . . D======================eER . . . . . . . . . . . . . . . . . bltu a0, a0, .Ltmp3
+# CHECK-NEXT: [0,41] . . . . D=====================eER . . . . . . . . . . . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,42] . . . . D======================eER. . . . . . . . . . . . . . . . . bge a0, a0, .Ltmp4
+# CHECK-NEXT: [0,43] . . . . D=====================eER. . . . . . . . . . . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,44] . . . . D======================eER . . . . . . . . . . . . . . . . bgeu a0, a0, .Ltmp5
+# CHECK-NEXT: [0,45] . . . . .D=====================eER . . . . . . . . . . . . . . . . add.uw a0, a0, a0
+# CHECK-NEXT: [0,46] . . . . . D=====================eER . . . . . . . . . . . . . . . . slli.uw a0, a0, 1
+# CHECK-NEXT: [0,47] . . . . . D=====================eER . . . . . . . . . . . . . . . . sh1add.uw a0, a0, a0
+# CHECK-NEXT: [0,48] . . . . . D=====================eER . . . . . . . . . . . . . . . . sh2add.uw a0, a0, a0
+# CHECK-NEXT: [0,49] . . . . . D=====================eER. . . . . . . . . . . . . . . . sh3add.uw a0, a0, a0
+# CHECK-NEXT: [0,50] . . . . . .D=====================eER . . . . . . . . . . . . . . . sh1add a0, a0, a0
+# CHECK-NEXT: [0,51] . . . . . . D=====================eER . . . . . . . . . . . . . . . sh2add a0, a0, a0
+# CHECK-NEXT: [0,52] . . . . . . D=====================eER . . . . . . . . . . . . . . . sh3add a0, a0, a0
+# CHECK-NEXT: [0,53] . . . . . . D=====================eER . . . . . . . . . . . . . . . andn a0, a0, a0
+# CHECK-NEXT: [0,54] . . . . . . . D==================eER. . . . . . . . . . . . . . . orn a0, a0, a0
+# CHECK-NEXT: [0,55] . . . . . . . . D===============eER . . . . . . . . . . . . . . xnor a0, a0, a0
+# CHECK-NEXT: [0,56] . . . . . . . . D===============eER . . . . . . . . . . . . . . sext.b a0, a0
+# CHECK-NEXT: [0,57] . . . . . . . . D===============eER . . . . . . . . . . . . . . sext.h a0, a0
+# CHECK-NEXT: [0,58] . . . . . . . . D===============eER . . . . . . . . . . . . . . zext.h a0, a0
+# CHECK-NEXT: [0,59] . . . . . . . . .D===============eER. . . . . . . . . . . . . . min a0, a0, a0
+# CHECK-NEXT: [0,60] . . . . . . . . . D===============eER . . . . . . . . . . . . . minu a0, a0, a0
+# CHECK-NEXT: [0,61] . . . . . . . . . D===============eER . . . . . . . . . . . . . max a0, a0, a0
+# CHECK-NEXT: [0,62] . . . . . . . . . D===============eER . . . . . . . . . . . . . maxu a0, a0, a0
+# CHECK-NEXT: [0,63] . . . . . . . . . D===============eER . . . . . . . . . . . . . rol a0, a0, a0
+# CHECK-NEXT: [0,64] . . . . . . . . . .D===============eER. . . . . . . . . . . . . ror a0, a0, a0
+# CHECK-NEXT: [0,65] . . . . . . . . . . D===============eER . . . . . . . . . . . . rori a0, a0, 1
+# CHECK-NEXT: [0,66] . . . . . . . . . . D================eeeER . . . . . . . . . . . . clz a0, a0
+# CHECK-NEXT: [0,67] . . . . . . . . . . D===================eeeER . . . . . . . . . . . clzw a0, a0
+# CHECK-NEXT: [0,68] . . . . . . . . . . D======================eeeER. . . . . . . . . . . ctz a0, a0
+# CHECK-NEXT: [0,69] . . . . . . . . . . D=========================eeeER . . . . . . . . . . ctzw a0, a0
+# CHECK-NEXT: [0,70] . . . . . . . . . . D============================eeeER . . . . . . . . . cpop a0, a0
+# CHECK-NEXT: [0,71] . . . . . . . . . . D==============================eER . . . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,72] . . . . . . . . . . D===============================eeeER. . . . . . . . . cpopw a0, a0
+# CHECK-NEXT: [0,73] . . . . . . . . . . D=================================eER . . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,74] . . . . . . . . . . D=================================eER . . . . . . . . rev8 a0, a0
+# CHECK-NEXT: [0,75] . . . . . . . . . . .D=================================eER . . . . . . . . orc.b a0, a0
+# CHECK-NEXT: [0,76] . . . . . . . . . . .D==================================eeeeeER . . . . . . . lb a0, 0(a0)
+# CHECK-NEXT: [0,77] . . . . . . . . . . . D=====================================eER . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,78] . . . . . . . . . . . D======================================eeeeeER . . . . . . lh a0, 0(a0)
+# CHECK-NEXT: [0,79] . . . . . . . . . . . D=========================================eER . . . . . . and a0, a0, a0
+# CHECK-NEXT: [0,80] . . . . . . . . . . . D==========================================eeeeeER . . . . . lw a0, 0(a0)
+# CHECK-NEXT: [0,81] . . . . . . . . . . . D=============================================eER . . . . . or a0, a0, a0
+# CHECK-NEXT: [0,82] . . . . . . . . . . . D==============================================eeeeeER . . . . ld a0, 0(a0)
+# CHECK-NEXT: [0,83] . . . . . . . . . . . D=================================================eER . . . . xor a0, a0, a0
+# CHECK-NEXT: [0,84] . . . . . . . . . . . D==================================================eeeeeER . . . lbu a0, 0(a0)
+# CHECK-NEXT: [0,85] . . . . . . . . . . . .D=====================================================eER . . . addi a0, a0, 1
+# CHECK-NEXT: [0,86] . . . . . . . . . . . .D======================================================eeeeeER . . lhu a0, 0(a0)
+# CHECK-NEXT: [0,87] . . . . . . . . . . . . D=========================================================eER . . sub a0, a0, a0
+# CHECK-NEXT: [0,88] . . . . . . . . . . . . D==========================================================eeeeeER. lwu a0, 0(a0)
+# CHECK-NEXT: [0,89] . . . . . . . . . . . . D=============================================================eER. addw a0, a0, a0
+# CHECK-NEXT: [0,90] . . . . . . . . . . . . D==============================================================eER jr a0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 lui a0, 1
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 auipc a1, 1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 add a0, a0, a1
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 addi a0, a0, 1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 addw a0, a0, a0
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 addiw a0, a0, 1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 sub a0, a0, a0
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 subw a0, a0, a0
+# CHECK-NEXT: 8. 1 7.0 0.0 0.0 and a0, a0, a0
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 andi a0, a0, 1
+# CHECK-NEXT: 10. 1 9.0 0.0 0.0 or a0, a0, a0
+# CHECK-NEXT: 11. 1 10.0 0.0 0.0 ori a0, a0, 1
+# CHECK-NEXT: 12. 1 10.0 0.0 0.0 xor a0, a0, a0
+# CHECK-NEXT: 13. 1 11.0 0.0 0.0 xori a0, a0, 1
+# CHECK-NEXT: 14. 1 12.0 0.0 0.0 sll a0, a0, a0
+# CHECK-NEXT: 15. 1 13.0 0.0 0.0 slli a0, a0, 1
+# CHECK-NEXT: 16. 1 14.0 0.0 0.0 sllw a0, a0, a0
+# CHECK-NEXT: 17. 1 15.0 0.0 0.0 slliw a0, a0, 1
+# CHECK-NEXT: 18. 1 15.0 0.0 0.0 srl a0, a0, a0
+# CHECK-NEXT: 19. 1 16.0 0.0 0.0 srli a0, a0, 1
+# CHECK-NEXT: 20. 1 16.0 0.0 0.0 srlw a0, a0, a0
+# CHECK-NEXT: 21. 1 16.0 0.0 0.0 srliw a0, a0, 1
+# CHECK-NEXT: 22. 1 16.0 0.0 0.0 sra a0, a0, a0
+# CHECK-NEXT: 23. 1 16.0 0.0 0.0 srai a0, a0, 1
+# CHECK-NEXT: 24. 1 16.0 0.0 0.0 sraw a0, a0, a0
+# CHECK-NEXT: 25. 1 16.0 0.0 0.0 sraiw a0, a0, 1
+# CHECK-NEXT: 26. 1 16.0 0.0 0.0 slt a0, a0, a0
+# CHECK-NEXT: 27. 1 16.0 0.0 0.0 slti a0, a0, 1
+# CHECK-NEXT: 28. 1 16.0 0.0 0.0 sltu a0, a0, a0
+# CHECK-NEXT: 29. 1 16.0 0.0 0.0 seqz a0, a0
+# CHECK-NEXT: 30. 1 17.0 0.0 0.0 mul a0, a0, a0
+# CHECK-NEXT: 31. 1 19.0 0.0 0.0 add a0, a0, a0
+# CHECK-NEXT: 32. 1 20.0 0.0 0.0 mulw a0, a0, a0
+# CHECK-NEXT: 33. 1 22.0 0.0 0.0 add a0, a0, a0
+# CHECK-NEXT: 34. 1 23.0 0.0 0.0 beq a0, a0, .Ltmp0
+# CHECK-NEXT: 35. 1 22.0 0.0 0.0 add a0, a0, a0
+# CHECK-NEXT: 36. 1 23.0 0.0 0.0 bne a0, a0, .Ltmp1
+# CHECK-NEXT: 37. 1 22.0 0.0 0.0 add a0, a0, a0
+# CHECK-NEXT: 38. 1 23.0 0.0 0.0 blt a0, a0, .Ltmp2
+# CHECK-NEXT: 39. 1 22.0 0.0 0.0 add a0, a0, a0
+# CHECK-NEXT: 40. 1 23.0 0.0 0.0 bltu a0, a0, .Ltmp3
+# CHECK-NEXT: 41. 1 22.0 0.0 0.0 add a0, a0, a0
+# CHECK-NEXT: 42. 1 23.0 0.0 0.0 bge a0, a0, .Ltmp4
+# CHECK-NEXT: 43. 1 22.0 0.0 0.0 add a0, a0, a0
+# CHECK-NEXT: 44. 1 23.0 0.0 0.0 bgeu a0, a0, .Ltmp5
+# CHECK-NEXT: 45. 1 22.0 0.0 0.0 add.uw a0, a0, a0
+# CHECK-NEXT: 46. 1 22.0 0.0 0.0 slli.uw a0, a0, 1
+# CHECK-NEXT: 47. 1 22.0 0.0 0.0 sh1add.uw a0, a0, a0
+# CHECK-NEXT: 48. 1 22.0 0.0 0.0 sh2add.uw a0, a0, a0
+# CHECK-NEXT: 49. 1 22.0 0.0 0.0 sh3add.uw a0, a0, a0
+# CHECK-NEXT: 50. 1 22.0 0.0 0.0 sh1add a0, a0, a0
+# CHECK-NEXT: 51. 1 22.0 0.0 0.0 sh2add a0, a0, a0
+# CHECK-NEXT: 52. 1 22.0 0.0 0.0 sh3add a0, a0, a0
+# CHECK-NEXT: 53. 1 22.0 0.0 0.0 andn a0, a0, a0
+# CHECK-NEXT: 54. 1 19.0 0.0 0.0 orn a0, a0, a0
+# CHECK-NEXT: 55. 1 16.0 0.0 0.0 xnor a0, a0, a0
+# CHECK-NEXT: 56. 1 16.0 0.0 0.0 sext.b a0, a0
+# CHECK-NEXT: 57. 1 16.0 0.0 0.0 sext.h a0, a0
+# CHECK-NEXT: 58. 1 16.0 0.0 0.0 zext.h a0, a0
+# CHECK-NEXT: 59. 1 16.0 0.0 0.0 min a0, a0, a0
+# CHECK-NEXT: 60. 1 16.0 0.0 0.0 minu a0, a0, a0
+# CHECK-NEXT: 61. 1 16.0 0.0 0.0 max a0, a0, a0
+# CHECK-NEXT: 62. 1 16.0 0.0 0.0 maxu a0, a0, a0
+# CHECK-NEXT: 63. 1 16.0 0.0 0.0 rol a0, a0, a0
+# CHECK-NEXT: 64. 1 16.0 0.0 0.0 ror a0, a0, a0
+# CHECK-NEXT: 65. 1 16.0 0.0 0.0 rori a0, a0, 1
+# CHECK-NEXT: 66. 1 17.0 0.0 0.0 clz a0, a0
+# CHECK-NEXT: 67. 1 20.0 0.0 0.0 clzw a0, a0
+# CHECK-NEXT: 68. 1 23.0 0.0 0.0 ctz a0, a0
+# CHECK-NEXT: 69. 1 26.0 0.0 0.0 ctzw a0, a0
+# CHECK-NEXT: 70. 1 29.0 0.0 0.0 cpop a0, a0
+# CHECK-NEXT: 71. 1 31.0 0.0 0.0 add a0, a0, a0
+# CHECK-NEXT: 72. 1 32.0 0.0 0.0 cpopw a0, a0
+# CHECK-NEXT: 73. 1 34.0 0.0 0.0 add a0, a0, a0
+# CHECK-NEXT: 74. 1 34.0 0.0 0.0 rev8 a0, a0
+# CHECK-NEXT: 75. 1 34.0 0.0 0.0 orc.b a0, a0
+# CHECK-NEXT: 76. 1 35.0 0.0 0.0 lb a0, 0(a0)
+# CHECK-NEXT: 77. 1 38.0 0.0 0.0 add a0, a0, a0
+# CHECK-NEXT: 78. 1 39.0 0.0 0.0 lh a0, 0(a0)
+# CHECK-NEXT: 79. 1 42.0 0.0 0.0 and a0, a0, a0
+# CHECK-NEXT: 80. 1 43.0 0.0 0.0 lw a0, 0(a0)
+# CHECK-NEXT: 81. 1 46.0 0.0 0.0 or a0, a0, a0
+# CHECK-NEXT: 82. 1 47.0 0.0 0.0 ld a0, 0(a0)
+# CHECK-NEXT: 83. 1 50.0 0.0 0.0 xor a0, a0, a0
+# CHECK-NEXT: 84. 1 51.0 0.0 0.0 lbu a0, 0(a0)
+# CHECK-NEXT: 85. 1 54.0 0.0 0.0 addi a0, a0, 1
+# CHECK-NEXT: 86. 1 55.0 0.0 0.0 lhu a0, 0(a0)
+# CHECK-NEXT: 87. 1 58.0 0.0 0.0 sub a0, a0, a0
+# CHECK-NEXT: 88. 1 59.0 0.0 0.0 lwu a0, 0(a0)
+# CHECK-NEXT: 89. 1 62.0 0.0 0.0 addw a0, a0, a0
+# CHECK-NEXT: 90. 1 63.0 0.0 0.0 jr a0
+# CHECK-NEXT: 1 22.7 0.0 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s
new file mode 100644
index 00000000000000..e1925e7647e33c
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan/load-to-alu.s
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-nanhu -timeline -iterations=1 < %s | FileCheck %s
+
+# Test XiangShan load to ALU (4 cycles)
+ld a1, 0(a0)
+addi a2, a1, 1
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 8
+# CHECK-NEXT: Total uOps: 2
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 0.25
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * ld a1, 0(a0)
+# CHECK-NEXT: 1 1 0.25 addi a2, a1, 1
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - XS2ALU
+# CHECK-NEXT: [0.1] - XS2ALU
+# CHECK-NEXT: [0.2] - XS2ALU
+# CHECK-NEXT: [0.3] - XS2ALU
+# CHECK-NEXT: [1.0] - XS2FMAC
+# CHECK-NEXT: [1.1] - XS2FMAC
+# CHECK-NEXT: [1.2] - XS2FMAC
+# CHECK-NEXT: [1.3] - XS2FMAC
+# CHECK-NEXT: [2.0] - XS2FMISC
+# CHECK-NEXT: [2.1] - XS2FMISC
+# CHECK-NEXT: [3.0] - XS2LD
+# CHECK-NEXT: [3.1] - XS2LD
+# CHECK-NEXT: [4.0] - XS2MDU
+# CHECK-NEXT: [4.1] - XS2MDU
+# CHECK-NEXT: [5] - XS2MISC
+# CHECK-NEXT: [6.0] - XS2ST
+# CHECK-NEXT: [6.1] - XS2ST
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [0.3] [1.0] [1.1] [1.2] [1.3] [2.0] [2.1] [3.0] [3.1] [4.0] [4.1] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - ld a1, 0(a0)
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - addi a2, a1, 1
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeeeER ld a1, 0(a0)
+# CHECK-NEXT: [0,1] D====eER addi a2, a1, 1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld a1, 0(a0)
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 addi a2, a1, 1
+# CHECK-NEXT: 1 3.0 0.5 0.0 <total>
>From a57f178f73606558df4dd9ad8e2c9ddff584cd4b Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 8 Nov 2023 13:12:43 +0800
Subject: [PATCH 2/6] fixup! [RISCV] Add sched model for XiangShan-NanHu
Add unsupported features.
---
llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index 24b5922a23df14..3b81a696720478 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -24,7 +24,8 @@ def XiangShanNanHuModel : SchedMachineModel {
let MispredictPenalty = 11; // Based on estimate of pipeline depth.
let CompleteModel = 0;
let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
- let UnsupportedFeatures = [];
+ let UnsupportedFeatures = [HasStdExtZcmt, HasStdExtZkr, HasVInstructions,
+ HasVInstructionsI64];
}
let SchedModel = XiangShanNanHuModel in {
>From 53c4c197d4cea50e2ace0878f5fa6a966bb6febb Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 17 Nov 2023 15:08:04 +0800
Subject: [PATCH 3/6] fixup! [RISCV] Add sched model for XiangShan-NanHu
Fix latency of zbs instructions
---
.../lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index 3b81a696720478..02874fc1096fd0 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -91,6 +91,12 @@ def : WriteRes<WriteBREV8, [XS2ALU]>;
def : WriteRes<WritePACK, [XS2ALU]>;
def : WriteRes<WritePACK32, [XS2ALU]>;
def : WriteRes<WriteZIP, [XS2ALU]>;
+
+// Zbs
+def : WriteRes<WriteSingleBit, [XS2ALU]>;
+def : WriteRes<WriteSingleBitImm, [XS2ALU]>;
+def : WriteRes<WriteBEXT, [XS2ALU]>;
+def : WriteRes<WriteBEXTI, [XS2ALU]>;
}
let Latency = 3 in {
@@ -102,12 +108,6 @@ def : WriteRes<WriteCTZ32, [XS2MDU]>;
def : WriteRes<WriteCPOP, [XS2MDU]>;
def : WriteRes<WriteCPOP32, [XS2MDU]>;
-// Zbs
-def : WriteRes<WriteSingleBit, [XS2MDU]>;
-def : WriteRes<WriteSingleBitImm, [XS2MDU]>;
-def : WriteRes<WriteBEXT, [XS2MDU]>;
-def : WriteRes<WriteBEXTI, [XS2MDU]>;
-
// Zbkc
def : WriteRes<WriteCLMUL, [XS2MDU]>;
@@ -289,8 +289,8 @@ def : XS2LoadToALUBypass<ReadREV8>;
// Zbkc
def : ReadAdvance<ReadCLMUL, 0>;
// Zbs
-def : ReadAdvance<ReadSingleBit, 0>;
-def : ReadAdvance<ReadSingleBitImm, 0>;
+def : XS2LoadToALUBypass<ReadSingleBit>;
+def : XS2LoadToALUBypass<ReadSingleBitImm>;
// Zbkb
def : XS2LoadToALUBypass<ReadBREV8>;
def : XS2LoadToALUBypass<ReadPACK>;
>From 9ae633301fd2d0582854d77f4e60c22eebfacd21 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 9 Feb 2024 11:54:56 +0800
Subject: [PATCH 4/6] [RISCV] Rebase on the top of
89f87c387627150d342722b79c78cea2311cddf7
---
llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index 02874fc1096fd0..a473391372e238 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -305,4 +305,5 @@ defm : UnsupportedSchedV;
defm : UnsupportedSchedZfa;
defm : UnsupportedSchedZfh;
defm : UnsupportedSchedSFB;
+defm : UnsupportedSchedZabha;
}
>From d0d8fda719835e1a09d0236f28ac23f7fb3fe560 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 11 Feb 2024 08:01:09 +0800
Subject: [PATCH 5/6] [RISCV] Tune features for XiangShan-NanHu
---
llvm/lib/Target/RISCV/RISCVProcessors.td | 6 +++++-
llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 1 -
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index ec50668aaa1681..8c75df41f5e395 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -348,4 +348,8 @@ def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
FeatureStdExtZksh,
FeatureStdExtSvinval,
FeatureStdExtZicbom,
- FeatureStdExtZicboz]>;
+ FeatureStdExtZicboz],
+ [TuneNoDefaultUnroll,
+ TuneZExtHFusion,
+ TuneZExtWFusion,
+ TuneShiftedZExtWFusion]>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index a473391372e238..36d2898de8466b 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -23,7 +23,6 @@ def XiangShanNanHuModel : SchedMachineModel {
let LoadLatency = 4;
let MispredictPenalty = 11; // Based on estimate of pipeline depth.
let CompleteModel = 0;
- let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
let UnsupportedFeatures = [HasStdExtZcmt, HasStdExtZkr, HasVInstructions,
HasVInstructionsI64];
}
>From a95ae0bc22c145f25f19142d59b5fbca3fe3ba24 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 11 Feb 2024 15:15:09 +0800
Subject: [PATCH 6/6] [RISCV] Fix header comments.
---
llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index 36d2898de8466b..667b5983cb401c 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -1,12 +1,12 @@
-//==- RISCVSchedXiangShanNanHu.td - XiangShan-NanHu Scheduling Definitions --*- tablegen -*-=//
+//==- RISCVSchedXiangShanNanHu.td - XS-NanHu Scheduling Defs -*- tablegen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
-//===-------------------------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
-//===-------------------------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
// XiangShan is a high-performance open-source RISC-V processor developed by
// the Institute of Computing Technology (ICT), Chinese Academy of Sciences.
More information about the llvm-commits
mailing list