[llvm] [RISCV] Add scheduler definitions for SpacemiT-X60 (PR #137343)
Mikhail R. Gadelha via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 25 07:53:28 PDT 2025
https://github.com/mikhailramalho created https://github.com/llvm/llvm-project/pull/137343
This patch adds an initial scheduler model for the SpacemiT-X60, including latency for scalar instructions only.
The scheduler is based on the documented characteristics of the C908, which the SpacemiT-X60 is believed to be based on, and provides the expected latency for several instructions. I ran llvm-exegesis to confirm most of these values and to get the latency of instructions not provided by the C908 documentation (e.g., double floating-point instructions).
For load and store instructions, the C908 documentation says the latency is \>= 3 for load and 1 for store. I tried a few combinations of values until I got the current values of 5 and 3, which yield the best results.
Although the X60 does appear to support multiple issue for at least some floating point instructions, this model assumes single issue as increasing it reduces the gains below.
This patch gives a geomean improvement of ~4% on SPEC CPU 2017 for both rva22u64 and rva22u64_v, with some benchmarks improving up to 15% (525.x264_r, 508.namd_r). There were no execution time regressions detected.
* rva22u64: https://lnt.lukelau.me/db_default/v4/nts/471?compare_to=405 (compares a commit functionally identical to 66afbfd to the baseline 8286b804)
* rva22u64_v: https://lnt.lukelau.me/db_default/v4/nts/474?compare_to=404 (compares a commit functionally identical to 66afbfd to the baseline 8286b804)
This initial scheduling model is strongly focused on providing sufficient definitions to provide improved performance for the SpacemiT-X60. Further incremental gains may be possible through a much more detailed microarchitectural analysis, but that is left to future work.
Further scheduling definitions for RVV can be added in a future PR.
>From 66afbfd0893967cabf3d9fa701ffbcc997a151e2 Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Thu, 10 Apr 2025 11:12:52 -0300
Subject: [PATCH] [RISCV] Add scheduler for x60
---
llvm/lib/Target/RISCV/RISCV.td | 1 +
llvm/lib/Target/RISCV/RISCVProcessors.td | 2 +-
.../lib/Target/RISCV/RISCVSchedSpacemitX60.td | 332 ++++++++++++++
.../RISCV/rvv/vxrm-insert-out-of-loop.ll | 50 +--
.../tools/llvm-mca/RISCV/SpacemitX60/atomic.s | 312 +++++++++++++
.../RISCV/SpacemitX60/floating-point.s | 334 ++++++++++++++
.../llvm-mca/RISCV/SpacemitX60/integer.s | 420 ++++++++++++++++++
7 files changed, 1425 insertions(+), 26 deletions(-)
create mode 100644 llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
create mode 100644 llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s
create mode 100644 llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s
create mode 100644 llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 2c2271e486a84..6a6cec88b74a4 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -57,6 +57,7 @@ include "RISCVSchedSyntacoreSCR345.td"
include "RISCVSchedSyntacoreSCR7.td"
include "RISCVSchedTTAscalonD8.td"
include "RISCVSchedXiangShanNanHu.td"
+include "RISCVSchedSpacemitX60.td"
//===----------------------------------------------------------------------===//
// RISC-V processors supported.
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 9d48adeec5e86..6e44518cb43f2 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -559,7 +559,7 @@ def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
TuneShiftedZExtWFusion]>;
def SPACEMIT_X60 : RISCVProcessorModel<"spacemit-x60",
- NoSchedModel,
+ SpacemitX60Model,
!listconcat(RVA22S64Features,
[FeatureStdExtV,
FeatureStdExtSscofpmf,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
new file mode 100644
index 0000000000000..d1148cc2f69dc
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -0,0 +1,332 @@
+//=- RISCVSchedSpacemitX60.td - Spacemit X60 Scheduling Defs -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Scheduler model for the SpacemiT-X60 processor based on documentation of the
+// C908 and experiments on real hardware (bpi-f3).
+//
+//===----------------------------------------------------------------------===//
+
+def SpacemitX60Model : SchedMachineModel {
+ let IssueWidth = 2; // dual-issue
+ let MicroOpBufferSize = 0; // in-order
+ let LoadLatency = 5; // worse case: >= 3
+ let MispredictPenalty = 9; // nine-stage
+
+ let CompleteModel = 0;
+
+ let UnsupportedFeatures = [HasStdExtZknd, HasStdExtZkne, HasStdExtZknh,
+ HasStdExtZksed, HasStdExtZksh, HasStdExtZkr];
+}
+
+let SchedModel = SpacemitX60Model in {
+
+//===----------------------------------------------------------------------===//
+// Define processor resources for Spacemit-X60
+
+// Information gathered from the C908 user manual:
+let BufferSize = 0 in {
+ // The LSU supports dual issue for scalar store/load instructions
+ def SMX60_LS : ProcResource<2>;
+
+ // An IEU can decode and issue two instructions at the same time
+ def SMX60_IEU : ProcResource<2>;
+
+ def SMX60_FP : ProcResource<1>;
+}
+
+//===----------------------------------------------------------------------===//
+
+// Branching
+def : WriteRes<WriteJmp, [SMX60_IEU]>;
+def : WriteRes<WriteJal, [SMX60_IEU]>;
+def : WriteRes<WriteJalr, [SMX60_IEU]>;
+
+// Integer arithmetic and logic
+def : WriteRes<WriteIALU32, [SMX60_IEU]>;
+def : WriteRes<WriteIALU, [SMX60_IEU]>;
+def : WriteRes<WriteShiftImm32, [SMX60_IEU]>;
+def : WriteRes<WriteShiftImm, [SMX60_IEU]>;
+def : WriteRes<WriteShiftReg32, [SMX60_IEU]>;
+def : WriteRes<WriteShiftReg, [SMX60_IEU]>;
+
+// Integer multiplication
+let Latency = 4 in {
+ def : WriteRes<WriteIMul, [SMX60_IEU]>;
+ def : WriteRes<WriteIMul32, [SMX60_IEU]>;
+}
+
+// Integer division/remainder
+// Worst case latency is used.
+def : WriteRes<WriteIDiv32, [SMX60_IEU]> { let Latency = 12; }
+def : WriteRes<WriteIDiv, [SMX60_IEU]> { let Latency = 20; }
+def : WriteRes<WriteIRem32, [SMX60_IEU]> { let Latency = 12; }
+def : WriteRes<WriteIRem, [SMX60_IEU]> { let Latency = 20; }
+
+// Bitmanip
+def : WriteRes<WriteRotateImm, [SMX60_IEU]>;
+def : WriteRes<WriteRotateImm32, [SMX60_IEU]>;
+def : WriteRes<WriteRotateReg, [SMX60_IEU]>;
+def : WriteRes<WriteRotateReg32, [SMX60_IEU]>;
+
+def : WriteRes<WriteCLZ, [SMX60_IEU]>;
+def : WriteRes<WriteCLZ32, [SMX60_IEU]>;
+def : WriteRes<WriteCTZ, [SMX60_IEU]>;
+def : WriteRes<WriteCTZ32, [SMX60_IEU]>;
+
+def : WriteRes<WriteCPOP, [SMX60_IEU]>;
+def : WriteRes<WriteCPOP32, [SMX60_IEU]>;
+
+def : WriteRes<WriteORCB, [SMX60_IEU]>;
+
+def : WriteRes<WriteIMinMax, [SMX60_IEU]>;
+
+def : WriteRes<WriteREV8, [SMX60_IEU]>;
+
+def : WriteRes<WriteSHXADD, [SMX60_IEU]>;
+def : WriteRes<WriteSHXADD32, [SMX60_IEU]>;
+
+// Single-bit instructions
+def : WriteRes<WriteSingleBit, [SMX60_IEU]>;
+def : WriteRes<WriteSingleBitImm, [SMX60_IEU]>;
+def : WriteRes<WriteBEXT, [SMX60_IEU]>;
+def : WriteRes<WriteBEXTI, [SMX60_IEU]>;
+
+// Memory/Atomic memory
+let Latency = 3 in {
+ def : WriteRes<WriteSTB, [SMX60_LS]>;
+ def : WriteRes<WriteSTH, [SMX60_LS]>;
+ def : WriteRes<WriteSTW, [SMX60_LS]>;
+ def : WriteRes<WriteSTD, [SMX60_LS]>;
+ def : WriteRes<WriteFST16, [SMX60_LS]>;
+ def : WriteRes<WriteFST32, [SMX60_LS]>;
+ def : WriteRes<WriteFST64, [SMX60_LS]>;
+ def : WriteRes<WriteAtomicSTW, [SMX60_LS]>;
+ def : WriteRes<WriteAtomicSTD, [SMX60_LS]>;
+}
+
+let Latency = 5 in {
+ def : WriteRes<WriteLDB, [SMX60_LS]>;
+ def : WriteRes<WriteLDH, [SMX60_LS]>;
+ def : WriteRes<WriteLDW, [SMX60_LS]>;
+ def : WriteRes<WriteLDD, [SMX60_LS]>;
+ def : WriteRes<WriteFLD16, [SMX60_LS]>;
+ def : WriteRes<WriteFLD32, [SMX60_LS]>;
+ def : WriteRes<WriteFLD64, [SMX60_LS]>;
+}
+
+// Atomics
+let Latency = 5 in {
+ def : WriteRes<WriteAtomicLDW, [SMX60_LS]>;
+ def : WriteRes<WriteAtomicLDD, [SMX60_LS]>;
+ def : WriteRes<WriteAtomicW, [SMX60_LS]>;
+ def : WriteRes<WriteAtomicD, [SMX60_LS]>;
+}
+
+// Floating point units Half precision
+def : WriteRes<WriteFAdd16, [SMX60_FP]> { let Latency = 3; }
+def : WriteRes<WriteFMul16, [SMX60_FP]> { let Latency = 3; }
+def : WriteRes<WriteFMA16, [SMX60_FP]> { let Latency = 4; }
+def : WriteRes<WriteFSGNJ16, [SMX60_FP]> { let Latency = 3; }
+def : WriteRes<WriteFMinMax16, [SMX60_FP]> { let Latency = 3; }
+
+// Worst case latency is used
+let Latency = 7, ReleaseAtCycles = [7] in {
+ def : WriteRes<WriteFDiv16, [SMX60_FP]>;
+ def : WriteRes<WriteFSqrt16, [SMX60_FP]>;
+}
+
+// Single precision
+def : WriteRes<WriteFAdd32, [SMX60_FP]> { let Latency = 3; }
+def : WriteRes<WriteFMul32, [SMX60_FP]> { let Latency = 4; }
+def : WriteRes<WriteFMA32, [SMX60_FP]> { let Latency = 5; }
+def : WriteRes<WriteFSGNJ32, [SMX60_FP]> { let Latency = 3; }
+def : WriteRes<WriteFMinMax32, [SMX60_FP]> { let Latency = 3; }
+
+// Worst case latency is used
+let Latency = 10, ReleaseAtCycles = [10] in {
+ def : WriteRes<WriteFDiv32, [SMX60_FP]>;
+ def : WriteRes<WriteFSqrt32, [SMX60_FP]>;
+}
+
+// Double precision
+def : WriteRes<WriteFAdd64, [SMX60_FP]> { let Latency = 4; }
+def : WriteRes<WriteFMul64, [SMX60_FP]> { let Latency = 4; }
+def : WriteRes<WriteFMA64, [SMX60_FP]> { let Latency = 5; }
+def : WriteRes<WriteFSGNJ64, [SMX60_FP]> { let Latency = 3; }
+def : WriteRes<WriteFMinMax64, [SMX60_FP]> { let Latency = 3; }
+
+let Latency = 10, ReleaseAtCycles = [10] in {
+ def : WriteRes<WriteFDiv64, [SMX60_FP]>;
+ def : WriteRes<WriteFSqrt64, [SMX60_FP]>;
+}
+
+// Conversions
+let Latency = 3 in {
+ def : WriteRes<WriteFCvtI32ToF16, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtI32ToF32, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtI32ToF64, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtI64ToF16, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtI64ToF32, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtI64ToF64, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtF16ToI32, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtF16ToI64, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtF16ToF32, [SMX60_FP]>;
+ def : WriteRes<WriteFCvtF16ToF64, [SMX60_FP]>;
+ def : WriteRes<WriteFCvtF32ToI32, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtF32ToI64, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtF32ToF16, [SMX60_FP]>;
+ def : WriteRes<WriteFCvtF32ToF64, [SMX60_FP]>;
+ def : WriteRes<WriteFCvtF64ToI32, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtF64ToI64, [SMX60_IEU]>;
+ def : WriteRes<WriteFCvtF64ToF16, [SMX60_FP]>;
+ def : WriteRes<WriteFCvtF64ToF32, [SMX60_FP]>;
+}
+
+let Latency = 2 in {
+ def : WriteRes<WriteFClass16, [SMX60_FP]>;
+ def : WriteRes<WriteFClass32, [SMX60_FP]>;
+ def : WriteRes<WriteFClass64, [SMX60_FP]>;
+}
+
+let Latency = 4 in {
+ def : WriteRes<WriteFCmp16, [SMX60_FP]>;
+ def : WriteRes<WriteFCmp32, [SMX60_FP]>;
+ def : WriteRes<WriteFCmp64, [SMX60_FP]>;
+}
+
+let Latency = 2 in {
+ def : WriteRes<WriteFMovI16ToF16, [SMX60_IEU]>;
+ def : WriteRes<WriteFMovF16ToI16, [SMX60_IEU]>;
+ def : WriteRes<WriteFMovI32ToF32, [SMX60_IEU]>;
+ def : WriteRes<WriteFMovF32ToI32, [SMX60_IEU]>;
+ def : WriteRes<WriteFMovI64ToF64, [SMX60_IEU]>;
+ def : WriteRes<WriteFMovF64ToI64, [SMX60_IEU]>;
+}
+
+// Others
+def : WriteRes<WriteCSR, [SMX60_IEU]>;
+def : WriteRes<WriteNop, [SMX60_IEU]>;
+
+//===----------------------------------------------------------------------===//
+// Bypass and advance
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : ReadAdvance<ReadIALU, 0>;
+def : ReadAdvance<ReadIALU32, 0>;
+def : ReadAdvance<ReadShiftImm, 0>;
+def : ReadAdvance<ReadShiftImm32, 0>;
+def : ReadAdvance<ReadShiftReg, 0>;
+def : ReadAdvance<ReadShiftReg32, 0>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIRem, 0>;
+def : ReadAdvance<ReadIRem32, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFStoreData, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
+def : ReadAdvance<ReadFAdd16, 0>;
+def : ReadAdvance<ReadFAdd32, 0>;
+def : ReadAdvance<ReadFAdd64, 0>;
+def : ReadAdvance<ReadFMul16, 0>;
+def : ReadAdvance<ReadFMA16, 0>;
+def : ReadAdvance<ReadFMA16Addend, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
+def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
+def : ReadAdvance<ReadFDiv16, 0>;
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt16, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp16, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFSGNJ16, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMinMax16, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
+def : ReadAdvance<ReadFCvtF16ToI32, 0>;
+def : ReadAdvance<ReadFCvtF16ToI64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF16, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF16, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFCvtF16ToF32, 0>;
+def : ReadAdvance<ReadFCvtF32ToF16, 0>;
+def : ReadAdvance<ReadFCvtF16ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF16, 0>;
+def : ReadAdvance<ReadFMovF16ToI16, 0>;
+def : ReadAdvance<ReadFMovI16ToF16, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass16, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+
+// Bitmanip
+def : ReadAdvance<ReadRotateImm, 0>;
+def : ReadAdvance<ReadRotateImm32, 0>;
+def : ReadAdvance<ReadRotateReg, 0>;
+def : ReadAdvance<ReadRotateReg32, 0>;
+def : ReadAdvance<ReadCLZ, 0>;
+def : ReadAdvance<ReadCLZ32, 0>;
+def : ReadAdvance<ReadCTZ, 0>;
+def : ReadAdvance<ReadCTZ32, 0>;
+def : ReadAdvance<ReadCPOP, 0>;
+def : ReadAdvance<ReadCPOP32, 0>;
+def : ReadAdvance<ReadORCB, 0>;
+def : ReadAdvance<ReadIMinMax, 0>;
+def : ReadAdvance<ReadREV8, 0>;
+def : ReadAdvance<ReadSHXADD, 0>;
+def : ReadAdvance<ReadSHXADD32, 0>;
+// Single-bit instructions
+def : ReadAdvance<ReadSingleBit, 0>;
+def : ReadAdvance<ReadSingleBitImm, 0>;
+
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedV;
+defm : UnsupportedSchedXsfvcp;
+defm : UnsupportedSchedZabha;
+defm : UnsupportedSchedZbc;
+defm : UnsupportedSchedZbkb;
+defm : UnsupportedSchedZbkx;
+defm : UnsupportedSchedZfa;
+defm : UnsupportedSchedZvk;
+defm : UnsupportedSchedSFB;
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index 75f4b977a98b0..b384a0187a1ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -302,32 +302,32 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64X60-NEXT: .cfi_offset s4, -40
; RV64X60-NEXT: li t0, 0
; RV64X60-NEXT: li t1, 0
-; RV64X60-NEXT: addi t2, a7, -1
-; RV64X60-NEXT: add t4, a0, a6
-; RV64X60-NEXT: add t5, a2, a6
-; RV64X60-NEXT: add t3, a4, a6
-; RV64X60-NEXT: zext.w s0, t2
-; RV64X60-NEXT: mul s1, a1, s0
-; RV64X60-NEXT: add t4, t4, s1
-; RV64X60-NEXT: mul s1, a3, s0
-; RV64X60-NEXT: add t5, t5, s1
+; RV64X60-NEXT: addi s1, a7, -1
+; RV64X60-NEXT: zext.w s1, s1
+; RV64X60-NEXT: mul t2, a1, s1
+; RV64X60-NEXT: mul t3, a3, s1
+; RV64X60-NEXT: mul t4, a5, s1
+; RV64X60-NEXT: add s1, a0, a6
+; RV64X60-NEXT: add s0, a2, a6
+; RV64X60-NEXT: add t5, a4, a6
+; RV64X60-NEXT: add s2, s1, t2
; RV64X60-NEXT: csrr t2, vlenb
-; RV64X60-NEXT: mul s1, a5, s0
-; RV64X60-NEXT: add t3, t3, s1
-; RV64X60-NEXT: sltu s1, a0, t5
-; RV64X60-NEXT: sltu s0, a2, t4
-; RV64X60-NEXT: and t6, s1, s0
+; RV64X60-NEXT: add t3, t3, s0
+; RV64X60-NEXT: or t6, a1, a3
+; RV64X60-NEXT: add t4, t4, t5
+; RV64X60-NEXT: sltu s0, a0, t3
+; RV64X60-NEXT: sltu s1, a2, s2
+; RV64X60-NEXT: and t5, s0, s1
+; RV64X60-NEXT: slli t3, t2, 1
+; RV64X60-NEXT: slti s1, t6, 0
+; RV64X60-NEXT: sltu s0, a0, t4
+; RV64X60-NEXT: or t4, t5, s1
+; RV64X60-NEXT: sltu s1, a4, s2
+; RV64X60-NEXT: and s0, s0, s1
+; RV64X60-NEXT: or s1, a1, a5
; RV64X60-NEXT: li t5, 32
-; RV64X60-NEXT: sltu s1, a0, t3
-; RV64X60-NEXT: sltu s0, a4, t4
-; RV64X60-NEXT: and t3, s1, s0
-; RV64X60-NEXT: or s1, a1, a3
; RV64X60-NEXT: slti s1, s1, 0
-; RV64X60-NEXT: or t4, t6, s1
-; RV64X60-NEXT: or s0, a1, a5
-; RV64X60-NEXT: slti s0, s0, 0
-; RV64X60-NEXT: or s0, t3, s0
-; RV64X60-NEXT: slli t3, t2, 1
+; RV64X60-NEXT: or s0, s0, s1
; RV64X60-NEXT: maxu s1, t3, t5
; RV64X60-NEXT: or s0, t4, s0
; RV64X60-NEXT: sltu s1, a6, s1
@@ -339,8 +339,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1
; RV64X60-NEXT: add t5, t5, a1
; RV64X60-NEXT: add a2, a2, a3
-; RV64X60-NEXT: add a4, a4, a5
; RV64X60-NEXT: addiw t1, t1, 1
+; RV64X60-NEXT: add a4, a4, a5
; RV64X60-NEXT: addi t0, t0, 1
; RV64X60-NEXT: beq t1, a7, .LBB0_11
; RV64X60-NEXT: .LBB0_4: # %for.cond1.preheader.us
@@ -367,10 +367,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64X60-NEXT: vl2r.v v8, (s2)
; RV64X60-NEXT: vl2r.v v10, (s3)
; RV64X60-NEXT: sub s1, s1, t3
-; RV64X60-NEXT: add s3, s3, t3
; RV64X60-NEXT: vaaddu.vv v8, v8, v10
; RV64X60-NEXT: vs2r.v v8, (s4)
; RV64X60-NEXT: add s4, s4, t3
+; RV64X60-NEXT: add s3, s3, t3
; RV64X60-NEXT: add s2, s2, t3
; RV64X60-NEXT: bnez s1, .LBB0_7
; RV64X60-NEXT: # %bb.8: # %middle.block
diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s
new file mode 100644
index 0000000000000..73109a78cd4b9
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s
@@ -0,0 +1,312 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva22u64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s
+
+# Zalrsc
+lr.w t0, (t1)
+lr.w.aq t1, (t2)
+lr.w.rl t2, (t3)
+lr.w.aqrl t3, (t4)
+sc.w t6, t5, (t4)
+sc.w.aq t5, t4, (t3)
+sc.w.rl t4, t3, (t2)
+sc.w.aqrl t3, t2, (t1)
+
+lr.d t0, (t1)
+lr.d.aq t1, (t2)
+lr.d.rl t2, (t3)
+lr.d.aqrl t3, (t4)
+sc.d t6, t5, (t4)
+sc.d.aq t5, t4, (t3)
+sc.d.rl t4, t3, (t2)
+sc.d.aqrl t3, t2, (t1)
+
+# Zaamo
+amoswap.w a4, ra, (s0)
+amoadd.w a1, a2, (a3)
+amoxor.w a2, a3, (a4)
+amoand.w a3, a4, (a5)
+amoor.w a4, a5, (a6)
+amomin.w a5, a6, (a7)
+amomax.w s7, s6, (s5)
+amominu.w s6, s5, (s4)
+amomaxu.w s5, s4, (s3)
+
+amoswap.w.aq a4, ra, (s0)
+amoadd.w.aq a1, a2, (a3)
+amoxor.w.aq a2, a3, (a4)
+amoand.w.aq a3, a4, (a5)
+amoor.w.aq a4, a5, (a6)
+amomin.w.aq a5, a6, (a7)
+amomax.w.aq s7, s6, (s5)
+amominu.w.aq s6, s5, (s4)
+amomaxu.w.aq s5, s4, (s3)
+
+amoswap.w.rl a4, ra, (s0)
+amoadd.w.rl a1, a2, (a3)
+amoxor.w.rl a2, a3, (a4)
+amoand.w.rl a3, a4, (a5)
+amoor.w.rl a4, a5, (a6)
+amomin.w.rl a5, a6, (a7)
+amomax.w.rl s7, s6, (s5)
+amominu.w.rl s6, s5, (s4)
+amomaxu.w.rl s5, s4, (s3)
+
+amoswap.w.aqrl a4, ra, (s0)
+amoadd.w.aqrl a1, a2, (a3)
+amoxor.w.aqrl a2, a3, (a4)
+amoand.w.aqrl a3, a4, (a5)
+amoor.w.aqrl a4, a5, (a6)
+amomin.w.aqrl a5, a6, (a7)
+amomax.w.aqrl s7, s6, (s5)
+amominu.w.aqrl s6, s5, (s4)
+amomaxu.w.aqrl s5, s4, (s3)
+
+amoswap.d a4, ra, (s0)
+amoadd.d a1, a2, (a3)
+amoxor.d a2, a3, (a4)
+amoand.d a3, a4, (a5)
+amoor.d a4, a5, (a6)
+amomin.d a5, a6, (a7)
+amomax.d s7, s6, (s5)
+amominu.d s6, s5, (s4)
+amomaxu.d s5, s4, (s3)
+
+amoswap.d.aq a4, ra, (s0)
+amoadd.d.aq a1, a2, (a3)
+amoxor.d.aq a2, a3, (a4)
+amoand.d.aq a3, a4, (a5)
+amoor.d.aq a4, a5, (a6)
+amomin.d.aq a5, a6, (a7)
+amomax.d.aq s7, s6, (s5)
+amominu.d.aq s6, s5, (s4)
+amomaxu.d.aq s5, s4, (s3)
+
+amoswap.d.rl a4, ra, (s0)
+amoadd.d.rl a1, a2, (a3)
+amoxor.d.rl a2, a3, (a4)
+amoand.d.rl a3, a4, (a5)
+amoor.d.rl a4, a5, (a6)
+amomin.d.rl a5, a6, (a7)
+amomax.d.rl s7, s6, (s5)
+amominu.d.rl s6, s5, (s4)
+amomaxu.d.rl s5, s4, (s3)
+
+amoswap.d.aqrl a4, ra, (s0)
+amoadd.d.aqrl a1, a2, (a3)
+amoxor.d.aqrl a2, a3, (a4)
+amoand.d.aqrl a3, a4, (a5)
+amoor.d.aqrl a4, a5, (a6)
+amomin.d.aqrl a5, a6, (a7)
+amomax.d.aqrl s7, s6, (s5)
+amominu.d.aqrl s6, s5, (s4)
+amomaxu.d.aqrl s5, s4, (s3)
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 88
+# CHECK-NEXT: Total Cycles: 86
+# CHECK-NEXT: Total uOps: 88
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.02
+# CHECK-NEXT: IPC: 1.02
+# CHECK-NEXT: Block RThroughput: 44.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * lr.w t0, (t1)
+# CHECK-NEXT: 1 5 0.50 * lr.w.aq t1, (t2)
+# CHECK-NEXT: 1 5 0.50 * lr.w.rl t2, (t3)
+# CHECK-NEXT: 1 5 0.50 * lr.w.aqrl t3, (t4)
+# CHECK-NEXT: 1 3 0.50 * sc.w t6, t5, (t4)
+# CHECK-NEXT: 1 3 0.50 * sc.w.aq t5, t4, (t3)
+# CHECK-NEXT: 1 3 0.50 * sc.w.rl t4, t3, (t2)
+# CHECK-NEXT: 1 3 0.50 * sc.w.aqrl t3, t2, (t1)
+# CHECK-NEXT: 1 5 0.50 * lr.d t0, (t1)
+# CHECK-NEXT: 1 5 0.50 * lr.d.aq t1, (t2)
+# CHECK-NEXT: 1 5 0.50 * lr.d.rl t2, (t3)
+# CHECK-NEXT: 1 5 0.50 * lr.d.aqrl t3, (t4)
+# CHECK-NEXT: 1 3 0.50 * sc.d t6, t5, (t4)
+# CHECK-NEXT: 1 3 0.50 * sc.d.aq t5, t4, (t3)
+# CHECK-NEXT: 1 3 0.50 * sc.d.rl t4, t3, (t2)
+# CHECK-NEXT: 1 3 0.50 * sc.d.aqrl t3, t2, (t1)
+# CHECK-NEXT: 1 5 0.50 * * amoswap.w a4, ra, (s0)
+# CHECK-NEXT: 1 5 0.50 * * amoadd.w a1, a2, (a3)
+# CHECK-NEXT: 1 5 0.50 * * amoxor.w a2, a3, (a4)
+# CHECK-NEXT: 1 5 0.50 * * amoand.w a3, a4, (a5)
+# CHECK-NEXT: 1 5 0.50 * * amoor.w a4, a5, (a6)
+# CHECK-NEXT: 1 5 0.50 * * amomin.w a5, a6, (a7)
+# CHECK-NEXT: 1 5 0.50 * * amomax.w s7, s6, (s5)
+# CHECK-NEXT: 1 5 0.50 * * amominu.w s6, s5, (s4)
+# CHECK-NEXT: 1 5 0.50 * * amomaxu.w s5, s4, (s3)
+# CHECK-NEXT: 1 5 0.50 * * amoswap.w.aq a4, ra, (s0)
+# CHECK-NEXT: 1 5 0.50 * * amoadd.w.aq a1, a2, (a3)
+# CHECK-NEXT: 1 5 0.50 * * amoxor.w.aq a2, a3, (a4)
+# CHECK-NEXT: 1 5 0.50 * * amoand.w.aq a3, a4, (a5)
+# CHECK-NEXT: 1 5 0.50 * * amoor.w.aq a4, a5, (a6)
+# CHECK-NEXT: 1 5 0.50 * * amomin.w.aq a5, a6, (a7)
+# CHECK-NEXT: 1 5 0.50 * * amomax.w.aq s7, s6, (s5)
+# CHECK-NEXT: 1 5 0.50 * * amominu.w.aq s6, s5, (s4)
+# CHECK-NEXT: 1 5 0.50 * * amomaxu.w.aq s5, s4, (s3)
+# CHECK-NEXT: 1 5 0.50 * * amoswap.w.rl a4, ra, (s0)
+# CHECK-NEXT: 1 5 0.50 * * amoadd.w.rl a1, a2, (a3)
+# CHECK-NEXT: 1 5 0.50 * * amoxor.w.rl a2, a3, (a4)
+# CHECK-NEXT: 1 5 0.50 * * amoand.w.rl a3, a4, (a5)
+# CHECK-NEXT: 1 5 0.50 * * amoor.w.rl a4, a5, (a6)
+# CHECK-NEXT: 1 5 0.50 * * amomin.w.rl a5, a6, (a7)
+# CHECK-NEXT: 1 5 0.50 * * amomax.w.rl s7, s6, (s5)
+# CHECK-NEXT: 1 5 0.50 * * amominu.w.rl s6, s5, (s4)
+# CHECK-NEXT: 1 5 0.50 * * amomaxu.w.rl s5, s4, (s3)
+# CHECK-NEXT: 1 5 0.50 * * amoswap.w.aqrl a4, ra, (s0)
+# CHECK-NEXT: 1 5 0.50 * * amoadd.w.aqrl a1, a2, (a3)
+# CHECK-NEXT: 1 5 0.50 * * amoxor.w.aqrl a2, a3, (a4)
+# CHECK-NEXT: 1 5 0.50 * * amoand.w.aqrl a3, a4, (a5)
+# CHECK-NEXT: 1 5 0.50 * * amoor.w.aqrl a4, a5, (a6)
+# CHECK-NEXT: 1 5 0.50 * * amomin.w.aqrl a5, a6, (a7)
+# CHECK-NEXT: 1 5 0.50 * * amomax.w.aqrl s7, s6, (s5)
+# CHECK-NEXT: 1 5 0.50 * * amominu.w.aqrl s6, s5, (s4)
+# CHECK-NEXT: 1 5 0.50 * * amomaxu.w.aqrl s5, s4, (s3)
+# CHECK-NEXT: 1 5 0.50 * * amoswap.d a4, ra, (s0)
+# CHECK-NEXT: 1 5 0.50 * * amoadd.d a1, a2, (a3)
+# CHECK-NEXT: 1 5 0.50 * * amoxor.d a2, a3, (a4)
+# CHECK-NEXT: 1 5 0.50 * * amoand.d a3, a4, (a5)
+# CHECK-NEXT: 1 5 0.50 * * amoor.d a4, a5, (a6)
+# CHECK-NEXT: 1 5 0.50 * * amomin.d a5, a6, (a7)
+# CHECK-NEXT: 1 5 0.50 * * amomax.d s7, s6, (s5)
+# CHECK-NEXT: 1 5 0.50 * * amominu.d s6, s5, (s4)
+# CHECK-NEXT: 1 5 0.50 * * amomaxu.d s5, s4, (s3)
+# CHECK-NEXT: 1 5 0.50 * * amoswap.d.aq a4, ra, (s0)
+# CHECK-NEXT: 1 5 0.50 * * amoadd.d.aq a1, a2, (a3)
+# CHECK-NEXT: 1 5 0.50 * * amoxor.d.aq a2, a3, (a4)
+# CHECK-NEXT: 1 5 0.50 * * amoand.d.aq a3, a4, (a5)
+# CHECK-NEXT: 1 5 0.50 * * amoor.d.aq a4, a5, (a6)
+# CHECK-NEXT: 1 5 0.50 * * amomin.d.aq a5, a6, (a7)
+# CHECK-NEXT: 1 5 0.50 * * amomax.d.aq s7, s6, (s5)
+# CHECK-NEXT: 1 5 0.50 * * amominu.d.aq s6, s5, (s4)
+# CHECK-NEXT: 1 5 0.50 * * amomaxu.d.aq s5, s4, (s3)
+# CHECK-NEXT: 1 5 0.50 * * amoswap.d.rl a4, ra, (s0)
+# CHECK-NEXT: 1 5 0.50 * * amoadd.d.rl a1, a2, (a3)
+# CHECK-NEXT: 1 5 0.50 * * amoxor.d.rl a2, a3, (a4)
+# CHECK-NEXT: 1 5 0.50 * * amoand.d.rl a3, a4, (a5)
+# CHECK-NEXT: 1 5 0.50 * * amoor.d.rl a4, a5, (a6)
+# CHECK-NEXT: 1 5 0.50 * * amomin.d.rl a5, a6, (a7)
+# CHECK-NEXT: 1 5 0.50 * * amomax.d.rl s7, s6, (s5)
+# CHECK-NEXT: 1 5 0.50 * * amominu.d.rl s6, s5, (s4)
+# CHECK-NEXT: 1 5 0.50 * * amomaxu.d.rl s5, s4, (s3)
+# CHECK-NEXT: 1 5 0.50 * * amoswap.d.aqrl a4, ra, (s0)
+# CHECK-NEXT: 1 5 0.50 * * amoadd.d.aqrl a1, a2, (a3)
+# CHECK-NEXT: 1 5 0.50 * * amoxor.d.aqrl a2, a3, (a4)
+# CHECK-NEXT: 1 5 0.50 * * amoand.d.aqrl a3, a4, (a5)
+# CHECK-NEXT: 1 5 0.50 * * amoor.d.aqrl a4, a5, (a6)
+# CHECK-NEXT: 1 5 0.50 * * amomin.d.aqrl a5, a6, (a7)
+# CHECK-NEXT: 1 5 0.50 * * amomax.d.aqrl s7, s6, (s5)
+# CHECK-NEXT: 1 5 0.50 * * amominu.d.aqrl s6, s5, (s4)
+# CHECK-NEXT: 1 5 0.50 * * amomaxu.d.aqrl s5, s4, (s3)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SMX60_FP
+# CHECK-NEXT: [1.0] - SMX60_IEU
+# CHECK-NEXT: [1.1] - SMX60_IEU
+# CHECK-NEXT: [2.0] - SMX60_LS
+# CHECK-NEXT: [2.1] - SMX60_LS
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1]
+# CHECK-NEXT: - - - 44.00 44.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] Instructions:
+# CHECK-NEXT: - - - - 1.00 lr.w t0, (t1)
+# CHECK-NEXT: - - - 1.00 - lr.w.aq t1, (t2)
+# CHECK-NEXT: - - - - 1.00 lr.w.rl t2, (t3)
+# CHECK-NEXT: - - - 1.00 - lr.w.aqrl t3, (t4)
+# CHECK-NEXT: - - - - 1.00 sc.w t6, t5, (t4)
+# CHECK-NEXT: - - - 1.00 - sc.w.aq t5, t4, (t3)
+# CHECK-NEXT: - - - - 1.00 sc.w.rl t4, t3, (t2)
+# CHECK-NEXT: - - - 1.00 - sc.w.aqrl t3, t2, (t1)
+# CHECK-NEXT: - - - - 1.00 lr.d t0, (t1)
+# CHECK-NEXT: - - - 1.00 - lr.d.aq t1, (t2)
+# CHECK-NEXT: - - - - 1.00 lr.d.rl t2, (t3)
+# CHECK-NEXT: - - - 1.00 - lr.d.aqrl t3, (t4)
+# CHECK-NEXT: - - - - 1.00 sc.d t6, t5, (t4)
+# CHECK-NEXT: - - - 1.00 - sc.d.aq t5, t4, (t3)
+# CHECK-NEXT: - - - - 1.00 sc.d.rl t4, t3, (t2)
+# CHECK-NEXT: - - - 1.00 - sc.d.aqrl t3, t2, (t1)
+# CHECK-NEXT: - - - - 1.00 amoswap.w a4, ra, (s0)
+# CHECK-NEXT: - - - 1.00 - amoadd.w a1, a2, (a3)
+# CHECK-NEXT: - - - - 1.00 amoxor.w a2, a3, (a4)
+# CHECK-NEXT: - - - 1.00 - amoand.w a3, a4, (a5)
+# CHECK-NEXT: - - - - 1.00 amoor.w a4, a5, (a6)
+# CHECK-NEXT: - - - 1.00 - amomin.w a5, a6, (a7)
+# CHECK-NEXT: - - - - 1.00 amomax.w s7, s6, (s5)
+# CHECK-NEXT: - - - 1.00 - amominu.w s6, s5, (s4)
+# CHECK-NEXT: - - - - 1.00 amomaxu.w s5, s4, (s3)
+# CHECK-NEXT: - - - 1.00 - amoswap.w.aq a4, ra, (s0)
+# CHECK-NEXT: - - - - 1.00 amoadd.w.aq a1, a2, (a3)
+# CHECK-NEXT: - - - 1.00 - amoxor.w.aq a2, a3, (a4)
+# CHECK-NEXT: - - - - 1.00 amoand.w.aq a3, a4, (a5)
+# CHECK-NEXT: - - - 1.00 - amoor.w.aq a4, a5, (a6)
+# CHECK-NEXT: - - - - 1.00 amomin.w.aq a5, a6, (a7)
+# CHECK-NEXT: - - - 1.00 - amomax.w.aq s7, s6, (s5)
+# CHECK-NEXT: - - - - 1.00 amominu.w.aq s6, s5, (s4)
+# CHECK-NEXT: - - - 1.00 - amomaxu.w.aq s5, s4, (s3)
+# CHECK-NEXT: - - - - 1.00 amoswap.w.rl a4, ra, (s0)
+# CHECK-NEXT: - - - 1.00 - amoadd.w.rl a1, a2, (a3)
+# CHECK-NEXT: - - - - 1.00 amoxor.w.rl a2, a3, (a4)
+# CHECK-NEXT: - - - 1.00 - amoand.w.rl a3, a4, (a5)
+# CHECK-NEXT: - - - - 1.00 amoor.w.rl a4, a5, (a6)
+# CHECK-NEXT: - - - 1.00 - amomin.w.rl a5, a6, (a7)
+# CHECK-NEXT: - - - - 1.00 amomax.w.rl s7, s6, (s5)
+# CHECK-NEXT: - - - 1.00 - amominu.w.rl s6, s5, (s4)
+# CHECK-NEXT: - - - - 1.00 amomaxu.w.rl s5, s4, (s3)
+# CHECK-NEXT: - - - 1.00 - amoswap.w.aqrl a4, ra, (s0)
+# CHECK-NEXT: - - - - 1.00 amoadd.w.aqrl a1, a2, (a3)
+# CHECK-NEXT: - - - 1.00 - amoxor.w.aqrl a2, a3, (a4)
+# CHECK-NEXT: - - - - 1.00 amoand.w.aqrl a3, a4, (a5)
+# CHECK-NEXT: - - - 1.00 - amoor.w.aqrl a4, a5, (a6)
+# CHECK-NEXT: - - - - 1.00 amomin.w.aqrl a5, a6, (a7)
+# CHECK-NEXT: - - - 1.00 - amomax.w.aqrl s7, s6, (s5)
+# CHECK-NEXT: - - - - 1.00 amominu.w.aqrl s6, s5, (s4)
+# CHECK-NEXT: - - - 1.00 - amomaxu.w.aqrl s5, s4, (s3)
+# CHECK-NEXT: - - - - 1.00 amoswap.d a4, ra, (s0)
+# CHECK-NEXT: - - - 1.00 - amoadd.d a1, a2, (a3)
+# CHECK-NEXT: - - - - 1.00 amoxor.d a2, a3, (a4)
+# CHECK-NEXT: - - - 1.00 - amoand.d a3, a4, (a5)
+# CHECK-NEXT: - - - - 1.00 amoor.d a4, a5, (a6)
+# CHECK-NEXT: - - - 1.00 - amomin.d a5, a6, (a7)
+# CHECK-NEXT: - - - - 1.00 amomax.d s7, s6, (s5)
+# CHECK-NEXT: - - - 1.00 - amominu.d s6, s5, (s4)
+# CHECK-NEXT: - - - - 1.00 amomaxu.d s5, s4, (s3)
+# CHECK-NEXT: - - - 1.00 - amoswap.d.aq a4, ra, (s0)
+# CHECK-NEXT: - - - - 1.00 amoadd.d.aq a1, a2, (a3)
+# CHECK-NEXT: - - - 1.00 - amoxor.d.aq a2, a3, (a4)
+# CHECK-NEXT: - - - - 1.00 amoand.d.aq a3, a4, (a5)
+# CHECK-NEXT: - - - 1.00 - amoor.d.aq a4, a5, (a6)
+# CHECK-NEXT: - - - - 1.00 amomin.d.aq a5, a6, (a7)
+# CHECK-NEXT: - - - 1.00 - amomax.d.aq s7, s6, (s5)
+# CHECK-NEXT: - - - - 1.00 amominu.d.aq s6, s5, (s4)
+# CHECK-NEXT: - - - 1.00 - amomaxu.d.aq s5, s4, (s3)
+# CHECK-NEXT: - - - - 1.00 amoswap.d.rl a4, ra, (s0)
+# CHECK-NEXT: - - - 1.00 - amoadd.d.rl a1, a2, (a3)
+# CHECK-NEXT: - - - - 1.00 amoxor.d.rl a2, a3, (a4)
+# CHECK-NEXT: - - - 1.00 - amoand.d.rl a3, a4, (a5)
+# CHECK-NEXT: - - - - 1.00 amoor.d.rl a4, a5, (a6)
+# CHECK-NEXT: - - - 1.00 - amomin.d.rl a5, a6, (a7)
+# CHECK-NEXT: - - - - 1.00 amomax.d.rl s7, s6, (s5)
+# CHECK-NEXT: - - - 1.00 - amominu.d.rl s6, s5, (s4)
+# CHECK-NEXT: - - - - 1.00 amomaxu.d.rl s5, s4, (s3)
+# CHECK-NEXT: - - - 1.00 - amoswap.d.aqrl a4, ra, (s0)
+# CHECK-NEXT: - - - - 1.00 amoadd.d.aqrl a1, a2, (a3)
+# CHECK-NEXT: - - - 1.00 - amoxor.d.aqrl a2, a3, (a4)
+# CHECK-NEXT: - - - - 1.00 amoand.d.aqrl a3, a4, (a5)
+# CHECK-NEXT: - - - 1.00 - amoor.d.aqrl a4, a5, (a6)
+# CHECK-NEXT: - - - - 1.00 amomin.d.aqrl a5, a6, (a7)
+# CHECK-NEXT: - - - 1.00 - amomax.d.aqrl s7, s6, (s5)
+# CHECK-NEXT: - - - - 1.00 amominu.d.aqrl s6, s5, (s4)
+# CHECK-NEXT: - - - 1.00 - amomaxu.d.aqrl s5, s4, (s3)
diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s
new file mode 100644
index 0000000000000..1cd6f2a91f2b7
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s
@@ -0,0 +1,334 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva22u64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s
+
+# Floating-Point Load and Store Instructions
+## Half-Precision
+flh ft0, 0(a0)
+fsh ft0, 0(a0)
+
+## Single-Precision
+flw ft0, 0(a0)
+fsw ft0, 0(a0)
+
+## Double-Precision
+fld ft0, 0(a0)
+fsd ft0, 0(a0)
+
+# Floating-Point Computational Instructions
+## Half-Precision
+fadd.h f26, f27, f28
+fsub.h f29, f30, f31
+fmul.h ft0, ft1, ft2
+fdiv.h ft3, ft4, ft5
+fsqrt.h ft6, ft7
+fmin.h fa5, fa6, fa7
+fmax.h fs2, fs3, fs4
+fmadd.h f10, f11, f12, f31
+fmsub.h f14, f15, f16, f17
+fnmsub.h f18, f19, f20, f21
+fnmadd.h f22, f23, f24, f25
+
+## Single-Precision
+fadd.s f26, f27, f28
+fsub.s f29, f30, f31
+fmul.s ft0, ft1, ft2
+fdiv.s ft3, ft4, ft5
+fsqrt.s ft6, ft7
+fmin.s fa5, fa6, fa7
+fmax.s fs2, fs3, fs4
+fmadd.s f10, f11, f12, f31
+fmsub.s f14, f15, f16, f17
+fnmsub.s f18, f19, f20, f21
+fnmadd.s f22, f23, f24, f25
+
+## Double-Precision
+fadd.d f26, f27, f28
+fsub.d f29, f30, f31
+fmul.d ft0, ft1, ft2
+fdiv.d ft3, ft4, ft5
+fsqrt.d ft6, ft7
+fmin.d fa5, fa6, fa7
+fmax.d fs2, fs3, fs4
+fmadd.d f10, f11, f12, f31
+fmsub.d f14, f15, f16, f17
+fnmsub.d f18, f19, f20, f21
+fnmadd.d f22, f23, f24, f25
+
+# Floating-Point Conversion and Move Instructions
+## Half-Precision
+fmv.x.h a2, fs7
+fmv.h.x ft1, a6
+
+fcvt.s.h fa0, ft0
+fcvt.s.h fa0, ft0, rup
+
+fcvt.h.s ft2, fa2
+fcvt.d.h fa0, ft0
+
+fcvt.d.h fa0, ft0, rup
+fcvt.h.d ft2, fa2
+
+## Single-Precision
+fcvt.w.s a0, fs5
+fcvt.wu.s a1, fs6
+fcvt.s.w ft11, a4
+fcvt.s.wu ft0, a5
+
+fcvt.l.s a0, ft0
+fcvt.lu.s a1, ft1
+fcvt.s.l ft2, a2
+fcvt.s.lu ft3, a3
+
+fmv.x.w a2, fs7
+fmv.w.x ft1, a6
+
+fsgnj.s fs1, fa0, fa1
+fsgnjn.s fa1, fa3, fa4
+
+## Double-Precision
+fcvt.wu.d a4, ft11
+fcvt.w.d a4, ft11
+fcvt.d.w ft0, a5
+fcvt.d.wu ft1, a6
+
+fcvt.s.d fs5, fs6
+fcvt.d.s fs7, fs8
+
+fcvt.l.d a0, ft0
+fcvt.lu.d a1, ft1
+fcvt.d.l ft3, a3
+fcvt.d.lu ft4, a4
+
+fmv.x.d a2, ft2
+fmv.d.x ft5, a5
+
+fsgnj.d fs1, fa0, fa1
+fsgnjn.d fa1, fa3, fa4
+
+# Floating-Point Compare Instructions
+## Half-Precision
+feq.h a1, fs8, fs9
+flt.h a2, fs10, fs11
+fle.h a3, ft8, ft9
+
+## Single-Precision
+feq.s a1, fs8, fs9
+flt.s a2, fs10, fs11
+fle.s a3, ft8, ft9
+
+## Double-Precision
+feq.d a1, fs8, fs9
+flt.d a2, fs10, fs11
+fle.d a3, ft8, ft9
+
+# Floating-Point Classify Instruction
+## Half-Precision
+fclass.s a3, ft10
+## Single-Precision
+fclass.s a3, ft10
+## Double-Precision
+fclass.d a3, ft10
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 85
+# CHECK-NEXT: Total Cycles: 138
+# CHECK-NEXT: Total uOps: 85
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 105.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * flh ft0, 0(a0)
+# CHECK-NEXT: 1 3 0.50 * fsh ft0, 0(a0)
+# CHECK-NEXT: 1 5 0.50 * flw ft0, 0(a0)
+# CHECK-NEXT: 1 3 0.50 * fsw ft0, 0(a0)
+# CHECK-NEXT: 1 5 0.50 * fld ft0, 0(a0)
+# CHECK-NEXT: 1 3 0.50 * fsd ft0, 0(a0)
+# CHECK-NEXT: 1 3 1.00 fadd.h fs10, fs11, ft8
+# CHECK-NEXT: 1 3 1.00 fsub.h ft9, ft10, ft11
+# CHECK-NEXT: 1 3 1.00 fmul.h ft0, ft1, ft2
+# CHECK-NEXT: 1 7 7.00 fdiv.h ft3, ft4, ft5
+# CHECK-NEXT: 1 7 7.00 fsqrt.h ft6, ft7
+# CHECK-NEXT: 1 3 1.00 fmin.h fa5, fa6, fa7
+# CHECK-NEXT: 1 3 1.00 fmax.h fs2, fs3, fs4
+# CHECK-NEXT: 1 4 1.00 fmadd.h fa0, fa1, fa2, ft11
+# CHECK-NEXT: 1 4 1.00 fmsub.h fa4, fa5, fa6, fa7
+# CHECK-NEXT: 1 4 1.00 fnmsub.h fs2, fs3, fs4, fs5
+# CHECK-NEXT: 1 4 1.00 fnmadd.h fs6, fs7, fs8, fs9
+# CHECK-NEXT: 1 3 1.00 fadd.s fs10, fs11, ft8
+# CHECK-NEXT: 1 3 1.00 fsub.s ft9, ft10, ft11
+# CHECK-NEXT: 1 4 1.00 fmul.s ft0, ft1, ft2
+# CHECK-NEXT: 1 10 10.00 fdiv.s ft3, ft4, ft5
+# CHECK-NEXT: 1 10 10.00 fsqrt.s ft6, ft7
+# CHECK-NEXT: 1 3 1.00 fmin.s fa5, fa6, fa7
+# CHECK-NEXT: 1 3 1.00 fmax.s fs2, fs3, fs4
+# CHECK-NEXT: 1 5 1.00 fmadd.s fa0, fa1, fa2, ft11
+# CHECK-NEXT: 1 5 1.00 fmsub.s fa4, fa5, fa6, fa7
+# CHECK-NEXT: 1 5 1.00 fnmsub.s fs2, fs3, fs4, fs5
+# CHECK-NEXT: 1 5 1.00 fnmadd.s fs6, fs7, fs8, fs9
+# CHECK-NEXT: 1 4 1.00 fadd.d fs10, fs11, ft8
+# CHECK-NEXT: 1 4 1.00 fsub.d ft9, ft10, ft11
+# CHECK-NEXT: 1 4 1.00 fmul.d ft0, ft1, ft2
+# CHECK-NEXT: 1 10 10.00 fdiv.d ft3, ft4, ft5
+# CHECK-NEXT: 1 10 10.00 fsqrt.d ft6, ft7
+# CHECK-NEXT: 1 3 1.00 fmin.d fa5, fa6, fa7
+# CHECK-NEXT: 1 3 1.00 fmax.d fs2, fs3, fs4
+# CHECK-NEXT: 1 5 1.00 fmadd.d fa0, fa1, fa2, ft11
+# CHECK-NEXT: 1 5 1.00 fmsub.d fa4, fa5, fa6, fa7
+# CHECK-NEXT: 1 5 1.00 fnmsub.d fs2, fs3, fs4, fs5
+# CHECK-NEXT: 1 5 1.00 fnmadd.d fs6, fs7, fs8, fs9
+# CHECK-NEXT: 1 2 0.50 fmv.x.h a2, fs7
+# CHECK-NEXT: 1 2 0.50 fmv.h.x ft1, a6
+# CHECK-NEXT: 1 3 1.00 fcvt.s.h fa0, ft0
+# CHECK-NEXT: 1 3 1.00 fcvt.s.h fa0, ft0, rup
+# CHECK-NEXT: 1 3 1.00 fcvt.h.s ft2, fa2
+# CHECK-NEXT: 1 3 1.00 fcvt.d.h fa0, ft0
+# CHECK-NEXT: 1 3 1.00 fcvt.d.h fa0, ft0, rup
+# CHECK-NEXT: 1 3 1.00 fcvt.h.d ft2, fa2
+# CHECK-NEXT: 1 3 0.50 fcvt.w.s a0, fs5
+# CHECK-NEXT: 1 3 0.50 fcvt.wu.s a1, fs6
+# CHECK-NEXT: 1 3 0.50 fcvt.s.w ft11, a4
+# CHECK-NEXT: 1 3 0.50 fcvt.s.wu ft0, a5
+# CHECK-NEXT: 1 3 0.50 fcvt.l.s a0, ft0
+# CHECK-NEXT: 1 3 0.50 fcvt.lu.s a1, ft1
+# CHECK-NEXT: 1 3 0.50 fcvt.s.l ft2, a2
+# CHECK-NEXT: 1 3 0.50 fcvt.s.lu ft3, a3
+# CHECK-NEXT: 1 2 0.50 fmv.x.w a2, fs7
+# CHECK-NEXT: 1 2 0.50 fmv.w.x ft1, a6
+# CHECK-NEXT: 1 3 1.00 fsgnj.s fs1, fa0, fa1
+# CHECK-NEXT: 1 3 1.00 fsgnjn.s fa1, fa3, fa4
+# CHECK-NEXT: 1 3 0.50 fcvt.wu.d a4, ft11
+# CHECK-NEXT: 1 3 0.50 fcvt.w.d a4, ft11
+# CHECK-NEXT: 1 3 0.50 fcvt.d.w ft0, a5
+# CHECK-NEXT: 1 3 0.50 fcvt.d.wu ft1, a6
+# CHECK-NEXT: 1 3 1.00 fcvt.s.d fs5, fs6
+# CHECK-NEXT: 1 3 1.00 fcvt.d.s fs7, fs8
+# CHECK-NEXT: 1 3 0.50 fcvt.l.d a0, ft0
+# CHECK-NEXT: 1 3 0.50 fcvt.lu.d a1, ft1
+# CHECK-NEXT: 1 3 0.50 fcvt.d.l ft3, a3
+# CHECK-NEXT: 1 3 0.50 fcvt.d.lu ft4, a4
+# CHECK-NEXT: 1 2 0.50 fmv.x.d a2, ft2
+# CHECK-NEXT: 1 2 0.50 fmv.d.x ft5, a5
+# CHECK-NEXT: 1 3 1.00 fsgnj.d fs1, fa0, fa1
+# CHECK-NEXT: 1 3 1.00 fsgnjn.d fa1, fa3, fa4
+# CHECK-NEXT: 1 4 1.00 feq.h a1, fs8, fs9
+# CHECK-NEXT: 1 4 1.00 flt.h a2, fs10, fs11
+# CHECK-NEXT: 1 4 1.00 fle.h a3, ft8, ft9
+# CHECK-NEXT: 1 4 1.00 feq.s a1, fs8, fs9
+# CHECK-NEXT: 1 4 1.00 flt.s a2, fs10, fs11
+# CHECK-NEXT: 1 4 1.00 fle.s a3, ft8, ft9
+# CHECK-NEXT: 1 4 1.00 feq.d a1, fs8, fs9
+# CHECK-NEXT: 1 4 1.00 flt.d a2, fs10, fs11
+# CHECK-NEXT: 1 4 1.00 fle.d a3, ft8, ft9
+# CHECK-NEXT: 1 2 1.00 fclass.s a3, ft10
+# CHECK-NEXT: 1 2 1.00 fclass.s a3, ft10
+# CHECK-NEXT: 1 2 1.00 fclass.d a3, ft10
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SMX60_FP
+# CHECK-NEXT: [1.0] - SMX60_IEU
+# CHECK-NEXT: [1.1] - SMX60_IEU
+# CHECK-NEXT: [2.0] - SMX60_LS
+# CHECK-NEXT: [2.1] - SMX60_LS
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1]
+# CHECK-NEXT: 105.00 11.00 11.00 3.00 3.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] Instructions:
+# CHECK-NEXT: - - - - 1.00 flh ft0, 0(a0)
+# CHECK-NEXT: - - - 1.00 - fsh ft0, 0(a0)
+# CHECK-NEXT: - - - - 1.00 flw ft0, 0(a0)
+# CHECK-NEXT: - - - 1.00 - fsw ft0, 0(a0)
+# CHECK-NEXT: - - - - 1.00 fld ft0, 0(a0)
+# CHECK-NEXT: - - - 1.00 - fsd ft0, 0(a0)
+# CHECK-NEXT: 1.00 - - - - fadd.h fs10, fs11, ft8
+# CHECK-NEXT: 1.00 - - - - fsub.h ft9, ft10, ft11
+# CHECK-NEXT: 1.00 - - - - fmul.h ft0, ft1, ft2
+# CHECK-NEXT: 7.00 - - - - fdiv.h ft3, ft4, ft5
+# CHECK-NEXT: 7.00 - - - - fsqrt.h ft6, ft7
+# CHECK-NEXT: 1.00 - - - - fmin.h fa5, fa6, fa7
+# CHECK-NEXT: 1.00 - - - - fmax.h fs2, fs3, fs4
+# CHECK-NEXT: 1.00 - - - - fmadd.h fa0, fa1, fa2, ft11
+# CHECK-NEXT: 1.00 - - - - fmsub.h fa4, fa5, fa6, fa7
+# CHECK-NEXT: 1.00 - - - - fnmsub.h fs2, fs3, fs4, fs5
+# CHECK-NEXT: 1.00 - - - - fnmadd.h fs6, fs7, fs8, fs9
+# CHECK-NEXT: 1.00 - - - - fadd.s fs10, fs11, ft8
+# CHECK-NEXT: 1.00 - - - - fsub.s ft9, ft10, ft11
+# CHECK-NEXT: 1.00 - - - - fmul.s ft0, ft1, ft2
+# CHECK-NEXT: 10.00 - - - - fdiv.s ft3, ft4, ft5
+# CHECK-NEXT: 10.00 - - - - fsqrt.s ft6, ft7
+# CHECK-NEXT: 1.00 - - - - fmin.s fa5, fa6, fa7
+# CHECK-NEXT: 1.00 - - - - fmax.s fs2, fs3, fs4
+# CHECK-NEXT: 1.00 - - - - fmadd.s fa0, fa1, fa2, ft11
+# CHECK-NEXT: 1.00 - - - - fmsub.s fa4, fa5, fa6, fa7
+# CHECK-NEXT: 1.00 - - - - fnmsub.s fs2, fs3, fs4, fs5
+# CHECK-NEXT: 1.00 - - - - fnmadd.s fs6, fs7, fs8, fs9
+# CHECK-NEXT: 1.00 - - - - fadd.d fs10, fs11, ft8
+# CHECK-NEXT: 1.00 - - - - fsub.d ft9, ft10, ft11
+# CHECK-NEXT: 1.00 - - - - fmul.d ft0, ft1, ft2
+# CHECK-NEXT: 10.00 - - - - fdiv.d ft3, ft4, ft5
+# CHECK-NEXT: 10.00 - - - - fsqrt.d ft6, ft7
+# CHECK-NEXT: 1.00 - - - - fmin.d fa5, fa6, fa7
+# CHECK-NEXT: 1.00 - - - - fmax.d fs2, fs3, fs4
+# CHECK-NEXT: 1.00 - - - - fmadd.d fa0, fa1, fa2, ft11
+# CHECK-NEXT: 1.00 - - - - fmsub.d fa4, fa5, fa6, fa7
+# CHECK-NEXT: 1.00 - - - - fnmsub.d fs2, fs3, fs4, fs5
+# CHECK-NEXT: 1.00 - - - - fnmadd.d fs6, fs7, fs8, fs9
+# CHECK-NEXT: - - 1.00 - - fmv.x.h a2, fs7
+# CHECK-NEXT: - 1.00 - - - fmv.h.x ft1, a6
+# CHECK-NEXT: 1.00 - - - - fcvt.s.h fa0, ft0
+# CHECK-NEXT: 1.00 - - - - fcvt.s.h fa0, ft0, rup
+# CHECK-NEXT: 1.00 - - - - fcvt.h.s ft2, fa2
+# CHECK-NEXT: 1.00 - - - - fcvt.d.h fa0, ft0
+# CHECK-NEXT: 1.00 - - - - fcvt.d.h fa0, ft0, rup
+# CHECK-NEXT: 1.00 - - - - fcvt.h.d ft2, fa2
+# CHECK-NEXT: - - 1.00 - - fcvt.w.s a0, fs5
+# CHECK-NEXT: - 1.00 - - - fcvt.wu.s a1, fs6
+# CHECK-NEXT: - - 1.00 - - fcvt.s.w ft11, a4
+# CHECK-NEXT: - 1.00 - - - fcvt.s.wu ft0, a5
+# CHECK-NEXT: - - 1.00 - - fcvt.l.s a0, ft0
+# CHECK-NEXT: - 1.00 - - - fcvt.lu.s a1, ft1
+# CHECK-NEXT: - - 1.00 - - fcvt.s.l ft2, a2
+# CHECK-NEXT: - 1.00 - - - fcvt.s.lu ft3, a3
+# CHECK-NEXT: - - 1.00 - - fmv.x.w a2, fs7
+# CHECK-NEXT: - 1.00 - - - fmv.w.x ft1, a6
+# CHECK-NEXT: 1.00 - - - - fsgnj.s fs1, fa0, fa1
+# CHECK-NEXT: 1.00 - - - - fsgnjn.s fa1, fa3, fa4
+# CHECK-NEXT: - - 1.00 - - fcvt.wu.d a4, ft11
+# CHECK-NEXT: - 1.00 - - - fcvt.w.d a4, ft11
+# CHECK-NEXT: - - 1.00 - - fcvt.d.w ft0, a5
+# CHECK-NEXT: - 1.00 - - - fcvt.d.wu ft1, a6
+# CHECK-NEXT: 1.00 - - - - fcvt.s.d fs5, fs6
+# CHECK-NEXT: 1.00 - - - - fcvt.d.s fs7, fs8
+# CHECK-NEXT: - - 1.00 - - fcvt.l.d a0, ft0
+# CHECK-NEXT: - 1.00 - - - fcvt.lu.d a1, ft1
+# CHECK-NEXT: - - 1.00 - - fcvt.d.l ft3, a3
+# CHECK-NEXT: - 1.00 - - - fcvt.d.lu ft4, a4
+# CHECK-NEXT: - - 1.00 - - fmv.x.d a2, ft2
+# CHECK-NEXT: - 1.00 - - - fmv.d.x ft5, a5
+# CHECK-NEXT: 1.00 - - - - fsgnj.d fs1, fa0, fa1
+# CHECK-NEXT: 1.00 - - - - fsgnjn.d fa1, fa3, fa4
+# CHECK-NEXT: 1.00 - - - - feq.h a1, fs8, fs9
+# CHECK-NEXT: 1.00 - - - - flt.h a2, fs10, fs11
+# CHECK-NEXT: 1.00 - - - - fle.h a3, ft8, ft9
+# CHECK-NEXT: 1.00 - - - - feq.s a1, fs8, fs9
+# CHECK-NEXT: 1.00 - - - - flt.s a2, fs10, fs11
+# CHECK-NEXT: 1.00 - - - - fle.s a3, ft8, ft9
+# CHECK-NEXT: 1.00 - - - - feq.d a1, fs8, fs9
+# CHECK-NEXT: 1.00 - - - - flt.d a2, fs10, fs11
+# CHECK-NEXT: 1.00 - - - - fle.d a3, ft8, ft9
+# CHECK-NEXT: 1.00 - - - - fclass.s a3, ft10
+# CHECK-NEXT: 1.00 - - - - fclass.s a3, ft10
+# CHECK-NEXT: 1.00 - - - - fclass.d a3, ft10
diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s
new file mode 100644
index 0000000000000..1c9d57a5b553f
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s
@@ -0,0 +1,420 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva22u64 -mcpu=spacemit-x60 -iterations=1 < %s | FileCheck %s
+
+# Integer Register-Immediate Instructions
+addi a0, a0, 1
+addiw a0, a0, 1
+slti a0, a0, 1
+sltiu a0, a0, 1
+
+andi a0, a0, 1
+ori a0, a0, 1
+xori a0, a0, 1
+
+slli a0, a0, 1
+srli a0, a0, 1
+srai a0, a0, 1
+slliw a0, a0, 1
+srliw a0, a0, 1
+sraiw a0, a0, 1
+
+lui a0, 1
+auipc a1, 1
+
+# Integer Register-Register Operations
+add a0, a0, a1
+addw a0, a0, a0
+slt a0, a0, a0
+sltu a0, a0, a0
+
+and a0, a0, a0
+or a0, a0, a0
+xor a0, a0, a0
+
+sll a0, a0, a0
+srl a0, a0, a0
+sra a0, a0, a0
+sllw a0, a0, a0
+srlw a0, a0, a0
+sraw a0, a0, a0
+
+sub a0, a0, a0
+subw a0, a0, a0
+
+# Control Transfer Instructions
+
+## Unconditional Jumps
+jal a0, 1f
+1:
+jalr a0
+beq a0, a0, 1f
+1:
+bne a0, a0, 1f
+1:
+blt a0, a0, 1f
+1:
+bltu a0, a0, 1f
+1:
+bge a0, a0, 1f
+1:
+bgeu a0, a0, 1f
+1:
+add a0, a0, a0
+
+# Load and Store Instructions
+lb t0, 0(a0)
+lbu t0, 0(a0)
+lh t0, 0(a0)
+lhu t0, 0(a0)
+lw t0, 0(a0)
+lwu t0, 0(a0)
+ld t0, 0(a0)
+
+sb t0, 0(a0)
+sh t0, 0(a0)
+sw t0, 0(a0)
+sd t0, 0(a0)
+
+# Multiply/Division
+mul a0, a0, a0
+mulh a0, a0, a0
+mulhu a0, a0, a0
+mulhsu a0, a0, a0
+mulw a0, a0, a0
+div a0, a1, a2
+divu a0, a1, a2
+rem a0, a1, a2
+remu a0, a1, a2
+divw a0, a1, a2
+divuw a0, a1, a2
+remw a0, a1, a2
+remuw a0, a1, a2
+
+# Zicsr
+csrrw t0, 0xfff, t1
+csrrs s3, 0x001, s5
+csrrc sp, 0x000, ra
+csrrwi a5, 0x000, 0
+csrrsi t2, 0xfff, 31
+csrrci t1, 0x140, 5
+
+# Zicond
+czero.eqz a0, a1, a2
+czero.nez a0, a1, a2
+
+# Zicond
+czero.eqz a0, a1, a2
+czero.nez a0, a1, a2
+
+# Zba
+add.uw a0, a0, a0
+slli.uw a0, a0, 1
+sh1add.uw a0, a0, a0
+sh2add.uw a0, a0, a0
+sh3add.uw a0, a0, a0
+sh1add a0, a0, a0
+sh2add a0, a0, a0
+sh3add a0, a0, a0
+
+# Zbb
+andn a0, a0, a0
+orn a0, a0, a0
+xnor a0, a0, a0
+
+clz a0, a0
+clzw a0, a0
+ctz a0, a0
+ctzw a0, a0
+
+cpop a0, a0
+cpopw a0, a0
+
+min a0, a0, a0
+minu a0, a0, a0
+max a0, a0, a0
+maxu a0, a0, a0
+
+sext.b a0, a0
+sext.h a0, a0
+zext.h a0, a0
+
+rol a0, a0, a0
+rolw a0, a0, a0
+ror a0, a0, a0
+rorw a0, a0, a0
+rori a0, a0, 1
+roriw a0, a0, 1
+
+orc.b a0, a0
+
+rev8 a0, a0
+
+# Zbs
+bclr a0, a1, a2
+bclri a0, a1, 1
+bext a0, a1, a2
+bexti a0, a1, 1
+binv a0, a1, a2
+binvi a0, a1, 1
+bset a0, a1, a2
+bseti a0, a1, 1
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 113
+# CHECK-NEXT: Total Cycles: 320
+# CHECK-NEXT: Total uOps: 113
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.35
+# CHECK-NEXT: IPC: 0.35
+# CHECK-NEXT: Block RThroughput: 56.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 addi a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 addiw a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 slti a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 seqz a0, a0
+# CHECK-NEXT: 1 1 0.50 andi a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 ori a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 xori a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 slli a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 srli a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 srai a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 slliw a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 srliw a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 sraiw a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 lui a0, 1
+# CHECK-NEXT: 1 1 0.50 auipc a1, 1
+# CHECK-NEXT: 1 1 0.50 add a0, a0, a1
+# CHECK-NEXT: 1 1 0.50 addw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 slt a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sltu a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 and a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 or a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 xor a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sll a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 srl a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sra a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sllw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 srlw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sraw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sub a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 subw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 jal a0, .Ltmp0
+# CHECK-NEXT: 1 1 0.50 jalr a0
+# CHECK-NEXT: 1 1 0.50 beq a0, a0, .Ltmp1
+# CHECK-NEXT: 1 1 0.50 bne a0, a0, .Ltmp2
+# CHECK-NEXT: 1 1 0.50 blt a0, a0, .Ltmp3
+# CHECK-NEXT: 1 1 0.50 bltu a0, a0, .Ltmp4
+# CHECK-NEXT: 1 1 0.50 bge a0, a0, .Ltmp5
+# CHECK-NEXT: 1 1 0.50 bgeu a0, a0, .Ltmp6
+# CHECK-NEXT: 1 1 0.50 add a0, a0, a0
+# CHECK-NEXT: 1 5 0.50 * lb t0, 0(a0)
+# CHECK-NEXT: 1 5 0.50 * lbu t0, 0(a0)
+# CHECK-NEXT: 1 5 0.50 * lh t0, 0(a0)
+# CHECK-NEXT: 1 5 0.50 * lhu t0, 0(a0)
+# CHECK-NEXT: 1 5 0.50 * lw t0, 0(a0)
+# CHECK-NEXT: 1 5 0.50 * lwu t0, 0(a0)
+# CHECK-NEXT: 1 5 0.50 * ld t0, 0(a0)
+# CHECK-NEXT: 1 3 0.50 * sb t0, 0(a0)
+# CHECK-NEXT: 1 3 0.50 * sh t0, 0(a0)
+# CHECK-NEXT: 1 3 0.50 * sw t0, 0(a0)
+# CHECK-NEXT: 1 3 0.50 * sd t0, 0(a0)
+# CHECK-NEXT: 1 4 0.50 mul a0, a0, a0
+# CHECK-NEXT: 1 4 0.50 mulh a0, a0, a0
+# CHECK-NEXT: 1 4 0.50 mulhu a0, a0, a0
+# CHECK-NEXT: 1 4 0.50 mulhsu a0, a0, a0
+# CHECK-NEXT: 1 4 0.50 mulw a0, a0, a0
+# CHECK-NEXT: 1 20 0.50 div a0, a1, a2
+# CHECK-NEXT: 1 20 0.50 divu a0, a1, a2
+# CHECK-NEXT: 1 20 0.50 rem a0, a1, a2
+# CHECK-NEXT: 1 20 0.50 remu a0, a1, a2
+# CHECK-NEXT: 1 12 0.50 divw a0, a1, a2
+# CHECK-NEXT: 1 12 0.50 divuw a0, a1, a2
+# CHECK-NEXT: 1 12 0.50 remw a0, a1, a2
+# CHECK-NEXT: 1 12 0.50 remuw a0, a1, a2
+# CHECK-NEXT: 1 1 0.50 U csrrw t0, 4095, t1
+# CHECK-NEXT: 1 1 0.50 U csrrs s3, fflags, s5
+# CHECK-NEXT: 1 1 0.50 U csrrc sp, 0, ra
+# CHECK-NEXT: 1 1 0.50 U csrrwi a5, 0, 0
+# CHECK-NEXT: 1 1 0.50 U csrrsi t2, 4095, 31
+# CHECK-NEXT: 1 1 0.50 U csrrci t1, sscratch, 5
+# CHECK-NEXT: 1 1 0.50 czero.eqz a0, a1, a2
+# CHECK-NEXT: 1 1 0.50 czero.nez a0, a1, a2
+# CHECK-NEXT: 1 1 0.50 czero.eqz a0, a1, a2
+# CHECK-NEXT: 1 1 0.50 czero.nez a0, a1, a2
+# CHECK-NEXT: 1 1 0.50 add.uw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 slli.uw a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 sh1add.uw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sh2add.uw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sh3add.uw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sh1add a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sh2add a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sh3add a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 andn a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 orn a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 xnor a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 clz a0, a0
+# CHECK-NEXT: 1 1 0.50 clzw a0, a0
+# CHECK-NEXT: 1 1 0.50 ctz a0, a0
+# CHECK-NEXT: 1 1 0.50 ctzw a0, a0
+# CHECK-NEXT: 1 1 0.50 cpop a0, a0
+# CHECK-NEXT: 1 1 0.50 cpopw a0, a0
+# CHECK-NEXT: 1 1 0.50 min a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 minu a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 max a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 maxu a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sext.b a0, a0
+# CHECK-NEXT: 1 1 0.50 sext.h a0, a0
+# CHECK-NEXT: 1 1 0.50 zext.h a0, a0
+# CHECK-NEXT: 1 1 0.50 rol a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 rolw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 ror a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 rorw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 rori a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 roriw a0, a0, 1
+# CHECK-NEXT: 1 1 0.50 orc.b a0, a0
+# CHECK-NEXT: 1 1 0.50 rev8 a0, a0
+# CHECK-NEXT: 1 1 0.50 bclr a0, a1, a2
+# CHECK-NEXT: 1 1 0.50 bclri a0, a1, 1
+# CHECK-NEXT: 1 1 0.50 bext a0, a1, a2
+# CHECK-NEXT: 1 1 0.50 bexti a0, a1, 1
+# CHECK-NEXT: 1 1 0.50 binv a0, a1, a2
+# CHECK-NEXT: 1 1 0.50 binvi a0, a1, 1
+# CHECK-NEXT: 1 1 0.50 bset a0, a1, a2
+# CHECK-NEXT: 1 1 0.50 bseti a0, a1, 1
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SMX60_FP
+# CHECK-NEXT: [1.0] - SMX60_IEU
+# CHECK-NEXT: [1.1] - SMX60_IEU
+# CHECK-NEXT: [2.0] - SMX60_LS
+# CHECK-NEXT: [2.1] - SMX60_LS
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1]
+# CHECK-NEXT: - 51.00 51.00 5.00 6.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] Instructions:
+# CHECK-NEXT: - - 1.00 - - addi a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - addiw a0, a0, 1
+# CHECK-NEXT: - - 1.00 - - slti a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - seqz a0, a0
+# CHECK-NEXT: - - 1.00 - - andi a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - ori a0, a0, 1
+# CHECK-NEXT: - - 1.00 - - xori a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - slli a0, a0, 1
+# CHECK-NEXT: - - 1.00 - - srli a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - srai a0, a0, 1
+# CHECK-NEXT: - - 1.00 - - slliw a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - srliw a0, a0, 1
+# CHECK-NEXT: - - 1.00 - - sraiw a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - lui a0, 1
+# CHECK-NEXT: - - 1.00 - - auipc a1, 1
+# CHECK-NEXT: - 1.00 - - - add a0, a0, a1
+# CHECK-NEXT: - - 1.00 - - addw a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - slt a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - sltu a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - and a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - or a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - xor a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - sll a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - srl a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - sra a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - sllw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - srlw a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - sraw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - sub a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - subw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - jal a0, .Ltmp0
+# CHECK-NEXT: - 1.00 - - - jalr a0
+# CHECK-NEXT: - - 1.00 - - beq a0, a0, .Ltmp1
+# CHECK-NEXT: - 1.00 - - - bne a0, a0, .Ltmp2
+# CHECK-NEXT: - - 1.00 - - blt a0, a0, .Ltmp3
+# CHECK-NEXT: - 1.00 - - - bltu a0, a0, .Ltmp4
+# CHECK-NEXT: - - 1.00 - - bge a0, a0, .Ltmp5
+# CHECK-NEXT: - 1.00 - - - bgeu a0, a0, .Ltmp6
+# CHECK-NEXT: - - 1.00 - - add a0, a0, a0
+# CHECK-NEXT: - - - - 1.00 lb t0, 0(a0)
+# CHECK-NEXT: - - - 1.00 - lbu t0, 0(a0)
+# CHECK-NEXT: - - - - 1.00 lh t0, 0(a0)
+# CHECK-NEXT: - - - 1.00 - lhu t0, 0(a0)
+# CHECK-NEXT: - - - - 1.00 lw t0, 0(a0)
+# CHECK-NEXT: - - - 1.00 - lwu t0, 0(a0)
+# CHECK-NEXT: - - - - 1.00 ld t0, 0(a0)
+# CHECK-NEXT: - - - 1.00 - sb t0, 0(a0)
+# CHECK-NEXT: - - - - 1.00 sh t0, 0(a0)
+# CHECK-NEXT: - - - 1.00 - sw t0, 0(a0)
+# CHECK-NEXT: - - - - 1.00 sd t0, 0(a0)
+# CHECK-NEXT: - 1.00 - - - mul a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - mulh a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - mulhu a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - mulhsu a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - mulw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - div a0, a1, a2
+# CHECK-NEXT: - 1.00 - - - divu a0, a1, a2
+# CHECK-NEXT: - - 1.00 - - rem a0, a1, a2
+# CHECK-NEXT: - 1.00 - - - remu a0, a1, a2
+# CHECK-NEXT: - - 1.00 - - divw a0, a1, a2
+# CHECK-NEXT: - 1.00 - - - divuw a0, a1, a2
+# CHECK-NEXT: - - 1.00 - - remw a0, a1, a2
+# CHECK-NEXT: - 1.00 - - - remuw a0, a1, a2
+# CHECK-NEXT: - - 1.00 - - csrrw t0, 4095, t1
+# CHECK-NEXT: - 1.00 - - - csrrs s3, fflags, s5
+# CHECK-NEXT: - - 1.00 - - csrrc sp, 0, ra
+# CHECK-NEXT: - 1.00 - - - csrrwi a5, 0, 0
+# CHECK-NEXT: - - 1.00 - - csrrsi t2, 4095, 31
+# CHECK-NEXT: - 1.00 - - - csrrci t1, sscratch, 5
+# CHECK-NEXT: - - 1.00 - - czero.eqz a0, a1, a2
+# CHECK-NEXT: - 1.00 - - - czero.nez a0, a1, a2
+# CHECK-NEXT: - - 1.00 - - czero.eqz a0, a1, a2
+# CHECK-NEXT: - 1.00 - - - czero.nez a0, a1, a2
+# CHECK-NEXT: - - 1.00 - - add.uw a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - slli.uw a0, a0, 1
+# CHECK-NEXT: - - 1.00 - - sh1add.uw a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - sh2add.uw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - sh3add.uw a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - sh1add a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - sh2add a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - sh3add a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - andn a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - orn a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - xnor a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - clz a0, a0
+# CHECK-NEXT: - - 1.00 - - clzw a0, a0
+# CHECK-NEXT: - 1.00 - - - ctz a0, a0
+# CHECK-NEXT: - - 1.00 - - ctzw a0, a0
+# CHECK-NEXT: - 1.00 - - - cpop a0, a0
+# CHECK-NEXT: - - 1.00 - - cpopw a0, a0
+# CHECK-NEXT: - 1.00 - - - min a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - minu a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - max a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - maxu a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - sext.b a0, a0
+# CHECK-NEXT: - - 1.00 - - sext.h a0, a0
+# CHECK-NEXT: - 1.00 - - - zext.h a0, a0
+# CHECK-NEXT: - - 1.00 - - rol a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - rolw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - ror a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - rorw a0, a0, a0
+# CHECK-NEXT: - - 1.00 - - rori a0, a0, 1
+# CHECK-NEXT: - 1.00 - - - roriw a0, a0, 1
+# CHECK-NEXT: - - 1.00 - - orc.b a0, a0
+# CHECK-NEXT: - 1.00 - - - rev8 a0, a0
+# CHECK-NEXT: - - 1.00 - - bclr a0, a1, a2
+# CHECK-NEXT: - 1.00 - - - bclri a0, a1, 1
+# CHECK-NEXT: - - 1.00 - - bext a0, a1, a2
+# CHECK-NEXT: - 1.00 - - - bexti a0, a1, 1
+# CHECK-NEXT: - - 1.00 - - binv a0, a1, a2
+# CHECK-NEXT: - 1.00 - - - binvi a0, a1, 1
+# CHECK-NEXT: - - 1.00 - - bset a0, a1, a2
+# CHECK-NEXT: - 1.00 - - - bseti a0, a1, 1
More information about the llvm-commits
mailing list