[llvm] [RISCV] Add scheduler definitions for XiangShan-KunMingHu (PR #148581)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 2 02:47:46 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Lin Wang (MrLinWang)
<details>
<summary>Changes</summary>
This patch adds an initial scheduler model for the XiangShan KunMingHu microarchitecture in the RISC-V backend, covering scalar instructions only. Support for vector instructions is not included in this patch.
The model includes initial ProcResource definitions and WriteRes mappings for scalar instructions, derived from the publicly available XiangShan KunMingHu documentation (including the [XiangShan Docs](https://docs.xiangshan.cc/projects/design/en/latest/) and [XiangShan User Guide](https://docs.xiangshan.cc/projects/user-guide/en/latest/)) as well as the [XiangShan KunMingHu RTL](https://github.com/OpenXiangShan/XiangShan) source code.
Simulator-based validation results will be provided in a follow-up update. Measurement results from real hardware will also be added afterwards to further validate and refine the model if necessary.
This implementation was contributed by the [XSCC Team](https://github.com/orgs/OpenXiangShan/teams/xscc).
---
Patch is 101.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148581.diff
16 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCV.td (+1)
- (modified) llvm/lib/Target/RISCV/RISCVProcessors.td (+1-1)
- (added) llvm/lib/Target/RISCV/RISCVSchedXiangShanKunMingHu.td (+357)
- (added) llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/bitmanip.s (+179)
- (added) llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/branch.s (+84)
- (added) llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/cascade-fma.s (+55)
- (added) llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/div-fdiv.s (+119)
- (added) llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/fpr-bypass.s (+250)
- (added) llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/gpr-bypass.s (+353)
- (added) llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/load-store.s (+100)
- (added) llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/load-to-alu.s (+75)
- (added) llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/load-to-fpu.s (+75)
- (added) llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/mul-fmul.s (+74)
- (renamed) llvm/test/tools/llvm-mca/RISCV/XiangShan-Nanhu/cascade-fma.s ()
- (renamed) llvm/test/tools/llvm-mca/RISCV/XiangShan-Nanhu/gpr-bypass.s ()
- (renamed) llvm/test/tools/llvm-mca/RISCV/XiangShan-Nanhu/load-to-alu.s ()
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 7a4d8a6fd55d5..d21d3dae20afe 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -67,6 +67,7 @@ include "RISCVSchedSyntacoreSCR345.td"
include "RISCVSchedSyntacoreSCR7.td"
include "RISCVSchedTTAscalonX.td"
include "RISCVSchedXiangShanNanHu.td"
+include "RISCVSchedXiangShanKunMingHu.td"
//===----------------------------------------------------------------------===//
// RISC-V processors supported.
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 44be2a5e9cc51..6b050af6aed9e 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -820,7 +820,7 @@ def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
TuneShiftedZExtWFusion]>;
def XIANGSHAN_KUNMINGHU : RISCVProcessorModel<"xiangshan-kunminghu",
- NoSchedModel,
+ XiangShanKunMingHuModel,
!listconcat(RVA23S64Features,
[FeatureStdExtZacas,
FeatureStdExtZbc,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanKunMingHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanKunMingHu.td
new file mode 100644
index 0000000000000..7602451aedd4a
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanKunMingHu.td
@@ -0,0 +1,357 @@
+//==- RISCVSchedXiangShanKunMingHu.td - XiangShanKunMingHu Scheduling Defs -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// The XiangShan is a high-performance open-source RISC-V processor project
+// initiated by the Institute of Computing Technology(ICT), Chinese Academy of Sciences(CAS).
+// The KunMingHu architecture is its third-generation derivative,
+// developed by the Institute of Computing Technology, Chinese Academy of Sciences
+// and the Beijing Institute of Open Source Chip (BOSC),
+// with a focus on achieving higher performance.
+// Source: https://github.com/OpenXiangShan/XiangShan
+// Documentation: https://docs.xiangshan.cc/projects/design/en/latest/
+// User Guide: https://docs.xiangshan.cc/projects/user-guide/en/latest/
+
+def XiangShanKunMingHuModel : SchedMachineModel {
+ let IssueWidth = 6; // 6-way decode and dispatch
+ let MicroOpBufferSize = 256;
+ let LoopMicroOpBufferSize = 48; // Instruction queue size
+ let LoadLatency = 6;
+ let MispredictPenalty = 13; // Based on estimate of pipeline depth.
+ let CompleteModel = 0;
+ let UnsupportedFeatures = [HasStdExtZcmt, HasStdExtZkr, HasVInstructions,
+ HasVInstructionsI64];
+}
+
+let SchedModel = XiangShanKunMingHuModel in {
+// Define each kind of processor resource and number available.
+/// Pipline
+let BufferSize = 24 in {
+ // Integer
+ def XSPipeALU0 : ProcResource<1>; // ALU, MUL, BKU
+ def XSPipeALU1 : ProcResource<1>; // ALU, MUL, BKU
+ def XSPipeALU2 : ProcResource<1>; // ALU
+ def XSPipeALU3 : ProcResource<1>; // ALU
+
+ def XSPipeBJU0 : ProcResource<1>; // BRU, JMP
+ def XSPipeBJU1 : ProcResource<1>; // BRU, JMP
+ def XSPipeBJU2 : ProcResource<1>; // BRU, JMP, I2F, I2V, VSet
+ def XSPipeBJU3 : ProcResource<1>; // CSR, Fence, DIV
+}
+
+let BufferSize = 18 in {
+ // Floating-point
+ def XSPipeFEX0 : ProcResource<1>; // FALU, FMA, FCVT, F2V
+ def XSPipeFEX1 : ProcResource<1>; // FDIV
+ def XSPipeFEX2 : ProcResource<1>; // FALU, FMA
+ def XSPipeFEX3 : ProcResource<1>; // FDIV
+ def XSPipeFEX4 : ProcResource<1>; // FALU, FMA
+}
+
+let BufferSize = 16 in {
+ // Load and store
+ def XSPipeLDU : ProcResource<3>; // LDU
+
+ def XSPipeSTA : ProcResource<2>; // STA
+ def XSPipeSTD : ProcResource<2>; // STD
+}
+
+def XSPipeGroupALU : ProcResGroup<[XSPipeALU0, XSPipeALU1, XSPipeALU2, XSPipeALU3]>;
+def XSPipeGroupMUL : ProcResGroup<[XSPipeALU0, XSPipeALU1]>;
+def XSPipeGroupBKU : ProcResGroup<[XSPipeALU0, XSPipeALU1]>;
+def XSPipeGroupBRU : ProcResGroup<[XSPipeBJU0, XSPipeBJU1, XSPipeBJU2]>;
+
+def XSPipeGroupFALU : ProcResGroup<[XSPipeFEX0, XSPipeFEX2, XSPipeFEX4]>;
+def XSPipeGroupFMA : ProcResGroup<[XSPipeFEX0, XSPipeFEX2, XSPipeFEX4]>;
+def XSPipeGroupFDIV : ProcResGroup<[XSPipeFEX1, XSPipeFEX3]>;
+
+def XSPipeGroupSTU : ProcResGroup<[XSPipeSTA, XSPipeSTD]>;
+
+//===----------------------------------------------------------------------===//
+
+
+// Jump
+def : WriteRes<WriteJmp, [XSPipeGroupBRU]>;
+def : WriteRes<WriteJal, [XSPipeGroupBRU]>;
+def : WriteRes<WriteJalr, [XSPipeGroupBRU]>;
+
+// Integer arithmetic and logic
+def : WriteRes<WriteIALU32, [XSPipeGroupALU]>;
+def : WriteRes<WriteIALU, [XSPipeGroupALU]>;
+def : WriteRes<WriteShiftImm32, [XSPipeGroupALU]>;
+def : WriteRes<WriteShiftImm, [XSPipeGroupALU]>;
+def : WriteRes<WriteShiftReg32, [XSPipeGroupALU]>;
+def : WriteRes<WriteShiftReg, [XSPipeGroupALU]>;
+
+// Integer multiplication
+let Latency = 2 in {
+ def : WriteRes<WriteIMul, [XSPipeGroupMUL]>;
+ def : WriteRes<WriteIMul32, [XSPipeGroupMUL]>;
+}
+
+// Integer division
+// Worst case latency is used.
+// The latency of integer division ranges from 4 to 20.
+let Latency = 20, ReleaseAtCycles = [20] in {
+ def : WriteRes<WriteIDiv32, [XSPipeBJU3]>;
+ def : WriteRes<WriteIDiv, [XSPipeBJU3]>;
+ def : WriteRes<WriteIRem32, [XSPipeBJU3]>;
+ def : WriteRes<WriteIRem, [XSPipeBJU3]>;
+}
+
+// Memory
+let Latency = 4 in {
+ def : WriteRes<WriteSTB, [XSPipeGroupSTU]>;
+ def : WriteRes<WriteSTH, [XSPipeGroupSTU]>;
+ def : WriteRes<WriteSTW, [XSPipeGroupSTU]>;
+ def : WriteRes<WriteSTD, [XSPipeGroupSTU]>;
+ def : WriteRes<WriteFST32, [XSPipeGroupSTU]>;
+ def : WriteRes<WriteFST64, [XSPipeGroupSTU]>;
+ def : WriteRes<WriteAtomicSTW, [XSPipeGroupSTU]>;
+ def : WriteRes<WriteAtomicSTD, [XSPipeGroupSTU]>;
+}
+
+let Latency = 6 in {
+ def : WriteRes<WriteLDB, [XSPipeLDU]>;
+ def : WriteRes<WriteLDH, [XSPipeLDU]>;
+ def : WriteRes<WriteLDW, [XSPipeLDU]>;
+ def : WriteRes<WriteLDD, [XSPipeLDU]>;
+
+ def : WriteRes<WriteFLD32, [XSPipeLDU]>;
+ def : WriteRes<WriteFLD64, [XSPipeLDU]>;
+
+ def : WriteRes<WriteAtomicW, [XSPipeLDU]>;
+ def : WriteRes<WriteAtomicD, [XSPipeLDU]>;
+ def : WriteRes<WriteAtomicLDW, [XSPipeLDU]>;
+ def : WriteRes<WriteAtomicLDD, [XSPipeLDU]>;
+}
+
+let Latency = 1 in {
+ def : WriteRes<WriteFAdd32, [XSPipeGroupFALU]>;
+ def : WriteRes<WriteFAdd64, [XSPipeGroupFALU]>;
+}
+
+let Latency = 2 in {
+ def : WriteRes<WriteFCmp32, [XSPipeGroupFALU]>;
+ def : WriteRes<WriteFCmp64, [XSPipeGroupFALU]>;
+ def : WriteRes<WriteFMinMax32, [XSPipeGroupFALU]>;
+ def : WriteRes<WriteFMinMax64, [XSPipeGroupFALU]>;
+ def : WriteRes<WriteFClass32, [XSPipeGroupFALU]>;
+ def : WriteRes<WriteFClass64, [XSPipeGroupFALU]>;
+ def : WriteRes<WriteFSGNJ32, [XSPipeGroupFALU]>;
+ def : WriteRes<WriteFSGNJ64, [XSPipeGroupFALU]>;
+}
+
+let Latency = 3 in {
+ def : WriteRes<WriteFMul32, [XSPipeGroupFMA]>;
+ def : WriteRes<WriteFMul64, [XSPipeGroupFMA]>;
+ def : WriteRes<WriteFMA32, [XSPipeGroupFMA]>;
+ def : WriteRes<WriteFMA64, [XSPipeGroupFMA]>;
+}
+
+// FDIV
+let Latency = 10 in {
+ def : WriteRes<WriteFDiv32, [XSPipeGroupFDIV]>;
+ def : WriteRes<WriteFSqrt32, [XSPipeGroupFDIV]>;
+}
+let Latency = 15 in {
+ def : WriteRes<WriteFDiv64, [XSPipeGroupFDIV]>;
+ def : WriteRes<WriteFSqrt64, [XSPipeGroupFDIV]>;
+}
+
+// FCVT
+let Latency = 2 in {
+ def : WriteRes<WriteFCvtF32ToI32, [XSPipeFEX0]>;
+ def : WriteRes<WriteFCvtF32ToI64, [XSPipeFEX0]>;
+ def : WriteRes<WriteFCvtF64ToI32, [XSPipeFEX0]>;
+ def : WriteRes<WriteFCvtF64ToI64, [XSPipeFEX0]>;
+ def : WriteRes<WriteFCvtF64ToF32, [XSPipeFEX0]>;
+ def : WriteRes<WriteFCvtF32ToF64, [XSPipeFEX0]>;
+ def : WriteRes<WriteFMovF64ToI64, [XSPipeFEX0]>;
+ def : WriteRes<WriteFMovF32ToI32, [XSPipeFEX0]>;
+}
+
+// I2V
+let Latency = 0 in {
+ def : WriteRes<WriteFMovI64ToF64, [XSPipeBJU2]>;
+ def : WriteRes<WriteFMovI32ToF32, [XSPipeBJU2]>;
+}
+
+// I2F
+let Latency = 2 in {
+ def : WriteRes<WriteFCvtI32ToF32, [XSPipeBJU2]>;
+ def : WriteRes<WriteFCvtI64ToF32, [XSPipeBJU2]>;
+ def : WriteRes<WriteFCvtI32ToF64, [XSPipeBJU2]>;
+ def : WriteRes<WriteFCvtI64ToF64, [XSPipeBJU2]>;
+}
+
+/// Zb*
+let Latency = 1 in {
+ // Zba
+ def : WriteRes<WriteSHXADD, [XSPipeGroupALU]>;
+ def : WriteRes<WriteSHXADD32, [XSPipeGroupALU]>;
+
+ // Zbb
+ def : WriteRes<WriteRotateImm, [XSPipeGroupALU]>;
+ def : WriteRes<WriteRotateImm32, [XSPipeGroupALU]>;
+ def : WriteRes<WriteRotateReg, [XSPipeGroupALU]>;
+ def : WriteRes<WriteRotateReg32, [XSPipeGroupALU]>;
+ def : WriteRes<WriteREV8, [XSPipeGroupALU]>;
+ def : WriteRes<WriteORCB, [XSPipeGroupALU]>;
+ def : WriteRes<WriteIMinMax, [XSPipeGroupALU]>;
+
+ // Zbs
+ def : WriteRes<WriteSingleBit, [XSPipeGroupALU]>;
+ def : WriteRes<WriteSingleBitImm, [XSPipeGroupALU]>;
+ def : WriteRes<WriteBEXT, [XSPipeGroupALU]>;
+ def : WriteRes<WriteBEXTI, [XSPipeGroupALU]>;
+
+ // Zbkb
+ def : WriteRes<WriteBREV8, [XSPipeGroupALU]>;
+ def : WriteRes<WritePACK, [XSPipeGroupALU]>;
+ def : WriteRes<WritePACK32, [XSPipeGroupALU]>;
+ def : WriteRes<WriteZIP, [XSPipeGroupALU]>;
+}
+
+let Latency = 2 in {
+ // Zbb
+ def : WriteRes<WriteCLZ, [XSPipeGroupBKU]>;
+ def : WriteRes<WriteCLZ32, [XSPipeGroupBKU]>;
+ def : WriteRes<WriteCTZ, [XSPipeGroupBKU]>;
+ def : WriteRes<WriteCTZ32, [XSPipeGroupBKU]>;
+ def : WriteRes<WriteCPOP, [XSPipeGroupBKU]>;
+ def : WriteRes<WriteCPOP32, [XSPipeGroupBKU]>;
+
+ // Zbc
+ def : WriteRes<WriteCLMUL, [XSPipeGroupBKU]>;
+
+ // Zbkx
+ def : WriteRes<WriteXPERM, [XSPipeGroupBKU]>;
+}
+
+// Others
+def : WriteRes<WriteCSR, [XSPipeBJU3]>;
+def : WriteRes<WriteNop, []>;
+
+def : InstRW<[WriteIALU], (instrs COPY)>;
+
+//===----------------------------------------------------------------------===//
+
+// Bypass and advance
+
+class XSLDUtoAnyBypass<SchedRead read, int cycles = 2>
+ : ReadAdvance<read, cycles, [WriteLDB, WriteLDH,
+ WriteLDW, WriteLDD,
+ WriteFLD32, WriteFLD64,
+ WriteAtomicW, WriteAtomicD,
+ WriteAtomicLDW, WriteAtomicLDD]>;
+
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : XSLDUtoAnyBypass<ReadIALU>;
+def : XSLDUtoAnyBypass<ReadIALU32>;
+def : XSLDUtoAnyBypass<ReadShiftImm>;
+def : XSLDUtoAnyBypass<ReadShiftImm32>;
+def : XSLDUtoAnyBypass<ReadShiftReg>;
+def : XSLDUtoAnyBypass<ReadShiftReg32>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIRem, 0>;
+def : ReadAdvance<ReadIRem32, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : XSLDUtoAnyBypass<ReadAtomicWA>;
+def : XSLDUtoAnyBypass<ReadAtomicWD>;
+def : XSLDUtoAnyBypass<ReadAtomicDA>;
+def : XSLDUtoAnyBypass<ReadAtomicDD>;
+def : XSLDUtoAnyBypass<ReadAtomicLDW>;
+def : XSLDUtoAnyBypass<ReadAtomicLDD>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFStoreData, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
+def : ReadAdvance<ReadFAdd32, 0>;
+def : ReadAdvance<ReadFAdd64, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
+def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+
+/// B extension
+// Zba
+def : ReadAdvance<ReadSHXADD, 0>;
+def : ReadAdvance<ReadSHXADD32, 0>;
+// Zbb
+def : ReadAdvance<ReadRotateImm, 0>;
+def : ReadAdvance<ReadRotateImm32, 0>;
+def : ReadAdvance<ReadRotateReg, 0>;
+def : ReadAdvance<ReadRotateReg32, 0>;
+def : ReadAdvance<ReadCLZ, 0>;
+def : ReadAdvance<ReadCLZ32, 0>;
+def : ReadAdvance<ReadCTZ, 0>;
+def : ReadAdvance<ReadCTZ32, 0>;
+def : ReadAdvance<ReadCPOP, 0>;
+def : ReadAdvance<ReadCPOP32, 0>;
+def : ReadAdvance<ReadREV8, 0>;
+def : ReadAdvance<ReadORCB, 0>;
+def : ReadAdvance<ReadIMinMax, 0>;
+// Zbc
+def : ReadAdvance<ReadCLMUL, 0>;
+// Zbs
+def : ReadAdvance<ReadSingleBit, 0>;
+def : ReadAdvance<ReadSingleBitImm, 0>;
+// Zbkb
+def : ReadAdvance<ReadBREV8, 0>;
+def : ReadAdvance<ReadPACK, 0>;
+def : ReadAdvance<ReadPACK32, 0>;
+def : ReadAdvance<ReadZIP, 0>;
+// Zbkx
+def : ReadAdvance<ReadXPERM, 0>;
+
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedQ;
+defm : UnsupportedSchedV;
+defm : UnsupportedSchedZfa;
+defm : UnsupportedSchedZfhmin;
+defm : UnsupportedSchedSFB;
+defm : UnsupportedSchedZabha;
+defm : UnsupportedSchedXsf;
+defm : UnsupportedSchedZvk;
+
+} // SchedModel
diff --git a/llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/bitmanip.s b/llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/bitmanip.s
new file mode 100644
index 0000000000000..924fc43a59df1
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/XiangShan-Kunminghu/bitmanip.s
@@ -0,0 +1,179 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=xiangshan-kunminghu -mattr=+zba,+zbb,+zbs -iterations=1 < %s | FileCheck %s
+
+sh1add a0, a1, a2
+sh2add a0, a1, a2
+sh3add a0, a1, a2
+sh1add.uw a0, a1, a2
+sh2add.uw a0, a1, a2
+sh3add.uw a0, a1, a2
+add.uw a0, a1, a2
+slli.uw a0, a1, 1
+
+clz a0, a1
+clzw a0, a1
+ctz a0, a1
+ctzw a0, a1
+cpop a0, a1
+cpopw a0, a1
+
+andn a0, a1, a2
+orn a0, a1, a2
+xnor a0, a1, a2
+
+rol a0, a1, a2
+rolw a0, a1, a2
+ror a0, a1, a2
+rorw a0, a1, a2
+rori a0, a1, 1
+roriw a0, a1, 1
+
+min a0, a1, a2
+minu a0, a1, a2
+max a0, a1, a2
+maxu a0, a1, a2
+
+sext.b a0, a1
+sext.h a0, a1
+zext.h a0, a1
+
+rev8 a0, a1
+orc.b a0, a1
+
+bclr a0, a1, a2
+bclri a0, a1, 1
+bext a0, a1, a2
+bexti a0, a1, 1
+binv a0, a1, a2
+binvi a0, a1, 1
+bset a0, a1, a2
+bseti a0, a1, 1
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 40
+# CHECK-NEXT: Total Cycles: 13
+# CHECK-NEXT: Total uOps: 40
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 3.08
+# CHECK-NEXT: IPC: 3.08
+# CHECK-NEXT: Block RThroughput: 8.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.25 sh1add a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 sh2add a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 sh3add a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 sh1add.uw a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 sh2add.uw a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 sh3add.uw a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 add.uw a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 slli.uw a0, a1, 1
+# CHECK-NEXT: 1 2 0.50 clz a0, a1
+# CHECK-NEXT: 1 2 0.50 clzw a0, a1
+# CHECK-NEXT: 1 2 0.50 ctz a0, a1
+# CHECK-NEXT: 1 2 0.50 ctzw a0, a1
+# CHECK-NEXT: 1 2 0.50 cpop a0, a1
+# CHECK-NEXT: 1 2 0.50 cpopw a0, a1
+# CHECK-NEXT: 1 1 0.25 andn a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 orn a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 xnor a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 rol a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 rolw a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 ror a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 rorw a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 rori a0, a1, 1
+# CHECK-NEXT: 1 1 0.25 roriw a0, a1, 1
+# CHECK-NEXT: 1 1 0.25 min a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 minu a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 max a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 maxu a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 sext.b a0, a1
+# CHECK-NEXT: 1 1 0.25 sext.h a0, a1
+# CHECK-NEXT: 1 1 0.25 zext.h a0, a1
+# CHECK-NEXT: 1 1 0.25 rev8 a0, a1
+# CHECK-NEXT: 1 1 0.25 orc.b a0, a1
+# CHECK-NEXT: 1 1 0.25 bclr a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 bclri a0, a1, 1
+# CHECK-NEXT: 1 1 0.25 bext a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 bexti a0, a1, 1
+# CHECK-NEXT: 1 1 0.25 binv a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 binvi a0, a1, 1
+# CHECK-NEXT: 1 1 0.25 bset a0, a1, a2
+# CHECK-NEXT: 1 1 0.25 bseti a0, a1, 1
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - XSPipeALU0
+# CHECK-NEXT: [1] - XSPipeALU1
+# CHECK-NEXT: [2] - XSPipeALU2
+# CHECK-NEXT: [3] - XSPipeALU3
+# CHECK-NEXT: [4] - XSPipeBJU0
+# CHECK-NEXT: [5] - XSPipeBJU1
+# CHECK-NEXT: [6] - XSPipeBJU2
+# CHECK-NEXT: [7] - XSPipeBJU3
+# CHECK-NEXT: [8] - XSPipeFEX0
+# CHECK-NEXT: [9] - XSPipeFEX1
+# CHECK-NEXT: [10] - XSPipeFEX2
+# CHECK-NEXT: [11] - XSPipeFEX3
+# CHECK-NEXT: [12] - XSPipeFEX4
+# CHECK-NEXT: [13.0] - XSPipeLDU
+# CHECK-NEXT: [13.1] - XSPipeLDU
+# CHECK-NEXT: [13.2] - XSPipeLDU
+# CHECK-NEXT: [14.0] - XSPipeSTA
+# CHECK-NEXT: [14.1] - XSPipeSTA
+# CHECK-NEXT: [15.0] - XSPipeSTD
+# CHECK-NEXT: [15.1] - XSPipeSTD
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13.0] [13.1] [13.2] [14.0] [14.1] [15.0] [15.1]
+# CHECK-NEXT: 10.00 10.00 10.00 10.00 - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13.0] [13.1] [13.2] [14.0] [14.1] [15.0] [15.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - - - - - sh1add a0, a1, a2
+# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - - - - sh2add a0, a1, a2
+# CHECK-NEXT: - 1.00 - - - - - - - ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/148581
More information about the llvm-commits
mailing list