[llvm] [RISCV] Implement base scheduling model for andes 45 series processor. (PR #141008)
Jim Lin via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 22:48:47 PDT 2025
https://github.com/tclin914 updated https://github.com/llvm/llvm-project/pull/141008
>From d992985d09f30aba3bbda43f3ecee49a1d6e263b Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Thu, 24 Apr 2025 17:53:47 +0800
Subject: [PATCH 1/2] [RISCV] Implement base scheduling model for andes 45
series processor.
This patch implements scheduling model for IMAFD and Zb extension. The
latency and throughput of all instructions, except load/store, are measured
by llvm-exegesis.
Scheduling model for V and other extensions will be added in a follow-up
patch.
---
llvm/lib/Target/RISCV/RISCV.td | 1 +
llvm/lib/Target/RISCV/RISCVProcessors.td | 8 +-
llvm/lib/Target/RISCV/RISCVSchedAndes45.td | 339 +++++++++++++++++++
llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s | 129 +++++++
llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s | 126 +++++++
5 files changed, 599 insertions(+), 4 deletions(-)
create mode 100644 llvm/lib/Target/RISCV/RISCVSchedAndes45.td
create mode 100644 llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s
create mode 100644 llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index e322ae340349c..b24d8637cb27f 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -51,6 +51,7 @@ include "RISCVMacroFusion.td"
//===----------------------------------------------------------------------===//
// RISC-V Scheduling Models
//===----------------------------------------------------------------------===//
+include "RISCVSchedAndes45.td"
include "RISCVSchedGenericOOO.td"
include "RISCVSchedMIPSP8700.td"
include "RISCVSchedRocket.td"
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 735997de94e81..47968c7afcd96 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -649,7 +649,7 @@ def RP2350_HAZARD3 : RISCVProcessorModel<"rp2350-hazard3",
FeatureStdExtZcmp]>;
def ANDES_N45 : RISCVProcessorModel<"andes-n45",
- NoSchedModel,
+ Andes45Model,
[Feature32Bit,
FeatureStdExtI,
FeatureStdExtZicsr,
@@ -662,7 +662,7 @@ def ANDES_N45 : RISCVProcessorModel<"andes-n45",
FeatureStdExtB]>;
def ANDES_NX45 : RISCVProcessorModel<"andes-nx45",
- NoSchedModel,
+ Andes45Model,
[Feature64Bit,
FeatureStdExtI,
FeatureStdExtZicsr,
@@ -675,7 +675,7 @@ def ANDES_NX45 : RISCVProcessorModel<"andes-nx45",
FeatureStdExtB]>;
def ANDES_A45 : RISCVProcessorModel<"andes-a45",
- NoSchedModel,
+ Andes45Model,
[Feature32Bit,
FeatureStdExtI,
FeatureStdExtZicsr,
@@ -688,7 +688,7 @@ def ANDES_A45 : RISCVProcessorModel<"andes-a45",
FeatureStdExtB]>;
def ANDES_AX45 : RISCVProcessorModel<"andes-ax45",
- NoSchedModel,
+ Andes45Model,
[Feature64Bit,
FeatureStdExtI,
FeatureStdExtZicsr,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
new file mode 100644
index 0000000000000..f42b48a1d8b83
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
@@ -0,0 +1,339 @@
+//==- RISCVSchedAndes45.td - Andes45 Scheduling Definitions --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+
+// FIXME: Implement sheduling model for V and other extensions.
+def Andes45Model : SchedMachineModel {
+ let MicroOpBufferSize = 0; // Andes45 is in-order processor
+ let IssueWidth = 2; // 2 micro-ops dispatched per cycle
+ let LoadLatency = 2;
+ let MispredictPenalty = 5;
+ let CompleteModel = 0;
+}
+
+let SchedModel = Andes45Model in {
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+//===----------------------------------------------------------------------===//
+// Andes 45 series CPU
+// - 2 Interger Arithmetic and Logical Units (ALU)
+// - Multiply / Divide Unit (MDU)
+// - Load Store Unit (LSU)
+// - Control and Status Register Unit (CSR)
+// - Floating Point Multiply-Accumulate Unit (FMAC)
+// - Floating Point Divide / SQRT Unit (FDIV)
+// - Floating Point Move Unit (FMV)
+// - Floating Point Misc Unit (FMISC)
+//===----------------------------------------------------------------------===//
+
+let BufferSize = 0 in {
+def Andes45ALU : ProcResource<2>;
+def Andes45MDU : ProcResource<1>;
+def Andes45LSU : ProcResource<1>;
+def Andes45CSR : ProcResource<1>;
+
+def Andes45FMAC : ProcResource<1>;
+def Andes45FDIV : ProcResource<1>;
+def Andes45FMV : ProcResource<1>;
+def Andes45FMISC : ProcResource<1>;
+}
+
+// Integer arithmetic and logic
+def : WriteRes<WriteIALU, [Andes45ALU]>;
+def : WriteRes<WriteIALU32, [Andes45ALU]>;
+def : WriteRes<WriteShiftImm, [Andes45ALU]>;
+def : WriteRes<WriteShiftImm32, [Andes45ALU]>;
+def : WriteRes<WriteShiftReg, [Andes45ALU]>;
+def : WriteRes<WriteShiftReg32, [Andes45ALU]>;
+
+// Branching
+def : WriteRes<WriteJmp, [Andes45ALU]>;
+def : WriteRes<WriteJal, [Andes45ALU]>;
+def : WriteRes<WriteJalr, [Andes45ALU]>;
+
+// Integer multiplication
+let Latency = 3 in {
+def : WriteRes<WriteIMul, [Andes45MDU]>;
+def : WriteRes<WriteIMul32, [Andes45MDU]>;
+}
+
+// Integer division
+let Latency = 39, ReleaseAtCycles = [39] in {
+def : WriteRes<WriteIDiv, [Andes45MDU]>;
+def : WriteRes<WriteIDiv32, [Andes45MDU]>;
+}
+
+// Integer remainder
+let Latency = 39, ReleaseAtCycles = [39] in {
+def : WriteRes<WriteIRem, [Andes45MDU]>;
+def : WriteRes<WriteIRem32, [Andes45MDU]>;
+}
+
+// Memory
+let Latency = 5 in {
+def : WriteRes<WriteLDB, [Andes45LSU]>;
+def : WriteRes<WriteLDH, [Andes45LSU]>;
+def : WriteRes<WriteFLD16, [Andes45LSU]>;
+}
+
+let Latency = 3 in {
+def : WriteRes<WriteLDW, [Andes45LSU]>;
+def : WriteRes<WriteLDD, [Andes45LSU]>;
+def : WriteRes<WriteFLD32, [Andes45LSU]>;
+def : WriteRes<WriteFLD64, [Andes45LSU]>;
+}
+
+let Latency = 1 in {
+def : WriteRes<WriteSTB, [Andes45LSU]>;
+def : WriteRes<WriteSTH, [Andes45LSU]>;
+def : WriteRes<WriteSTW, [Andes45LSU]>;
+def : WriteRes<WriteSTD, [Andes45LSU]>;
+def : WriteRes<WriteFST16, [Andes45LSU]>;
+def : WriteRes<WriteFST32, [Andes45LSU]>;
+def : WriteRes<WriteFST64, [Andes45LSU]>;
+}
+
+// Atomic Memory
+let Latency = 9 in {
+def : WriteRes<WriteAtomicW, [Andes45LSU]>;
+def : WriteRes<WriteAtomicD, [Andes45LSU]>;
+def : WriteRes<WriteAtomicLDW, [Andes45LSU]>;
+def : WriteRes<WriteAtomicLDD, [Andes45LSU]>;
+}
+
+let Latency = 3 in {
+def : WriteRes<WriteAtomicSTW, [Andes45LSU]>;
+def : WriteRes<WriteAtomicSTD, [Andes45LSU]>;
+}
+
+// FMAC
+let Latency = 4 in {
+def : WriteRes<WriteFAdd16, [Andes45FMAC]>;
+def : WriteRes<WriteFAdd32, [Andes45FMAC]>;
+def : WriteRes<WriteFAdd64, [Andes45FMAC]>;
+def : WriteRes<WriteFMul16, [Andes45FMAC]>;
+def : WriteRes<WriteFMul32, [Andes45FMAC]>;
+def : WriteRes<WriteFMul64, [Andes45FMAC]>;
+def : WriteRes<WriteFMA16, [Andes45FMAC]>;
+def : WriteRes<WriteFMA32, [Andes45FMAC]>;
+def : WriteRes<WriteFMA64, [Andes45FMAC]>;
+}
+
+// FDIV
+let Latency = 12, ReleaseAtCycles = [12] in
+def : WriteRes<WriteFDiv16, [Andes45FDIV]>;
+let Latency = 11, ReleaseAtCycles = [11] in
+def : WriteRes<WriteFSqrt16, [Andes45FDIV]>;
+
+let Latency = 19, ReleaseAtCycles = [19] in
+def : WriteRes<WriteFDiv32, [Andes45FDIV]>;
+let Latency = 18, ReleaseAtCycles = [18] in
+def : WriteRes<WriteFSqrt32, [Andes45FDIV]>;
+
+let Latency = 33, ReleaseAtCycles = [33] in
+def : WriteRes<WriteFDiv64, [Andes45FDIV]>;
+let Latency = 32, ReleaseAtCycles = [32] in
+def : WriteRes<WriteFSqrt64, [Andes45FDIV]>;
+
+// FMV
+def : WriteRes<WriteFSGNJ16, [Andes45FMV]>;
+def : WriteRes<WriteFSGNJ32, [Andes45FMV]>;
+def : WriteRes<WriteFSGNJ64, [Andes45FMV]>;
+def : WriteRes<WriteFMovF16ToI16, [Andes45FMV]>;
+def : WriteRes<WriteFMovI16ToF16, [Andes45FMV]>;
+def : WriteRes<WriteFMovF32ToI32, [Andes45FMV]>;
+def : WriteRes<WriteFMovI32ToF32, [Andes45FMV]>;
+def : WriteRes<WriteFMovF64ToI64, [Andes45FMV]>;
+def : WriteRes<WriteFMovI64ToF64, [Andes45FMV]>;
+
+// FMISC
+let Latency = 2 in {
+def : WriteRes<WriteFMinMax16, [Andes45FMISC]>;
+def : WriteRes<WriteFMinMax32, [Andes45FMISC]>;
+def : WriteRes<WriteFMinMax64, [Andes45FMISC]>;
+def : WriteRes<WriteFClass16, [Andes45FMISC]>;
+def : WriteRes<WriteFClass32, [Andes45FMISC]>;
+def : WriteRes<WriteFClass64, [Andes45FMISC]>;
+def : WriteRes<WriteFCmp16, [Andes45FMISC]>;
+def : WriteRes<WriteFCmp32, [Andes45FMISC]>;
+def : WriteRes<WriteFCmp64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF16ToI32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF16ToI64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF32ToI32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF32ToI64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF64ToI32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF64ToI64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI32ToF16, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI32ToF32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI32ToF64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI64ToF16, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI64ToF32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtI64ToF64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF16ToF32, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF16ToF64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF32ToF16, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF32ToF64, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF64ToF16, [Andes45FMISC]>;
+def : WriteRes<WriteFCvtF64ToF32, [Andes45FMISC]>;
+}
+
+// Bitmanip
+// Zba extension
+def : WriteRes<WriteSHXADD, [Andes45ALU]>;
+def : WriteRes<WriteSHXADD32, [Andes45ALU]>;
+
+// Zbb extension
+def : WriteRes<WriteRotateImm, [Andes45ALU]>;
+def : WriteRes<WriteRotateImm32, [Andes45ALU]>;
+def : WriteRes<WriteRotateReg, [Andes45ALU]>;
+def : WriteRes<WriteRotateReg32, [Andes45ALU]>;
+def : WriteRes<WriteREV8, [Andes45ALU]>;
+def : WriteRes<WriteORCB, [Andes45ALU]>;
+def : WriteRes<WriteIMinMax, [Andes45ALU]>;
+
+let Latency = 3 in {
+def : WriteRes<WriteCLZ, [Andes45ALU]>;
+def : WriteRes<WriteCLZ32, [Andes45ALU]>;
+def : WriteRes<WriteCTZ, [Andes45ALU]>;
+def : WriteRes<WriteCTZ32, [Andes45ALU]>;
+def : WriteRes<WriteCPOP, [Andes45ALU]>;
+def : WriteRes<WriteCPOP32, [Andes45ALU]>;
+}
+
+// Zbc extension
+let Latency = 3 in
+def : WriteRes<WriteCLMUL, [Andes45ALU]>;
+
+// Zbs extension
+def : WriteRes<WriteSingleBit, [Andes45ALU]>;
+def : WriteRes<WriteSingleBitImm, [Andes45ALU]>;
+def : WriteRes<WriteBEXT, [Andes45ALU]>;
+def : WriteRes<WriteBEXTI, [Andes45ALU]>;
+
+// Others
+def : WriteRes<WriteCSR, [Andes45CSR]>;
+def : WriteRes<WriteNop, []>;
+
+//===----------------------------------------------------------------------===//
+
+// Bypass and advance
+def : ReadAdvance<ReadIALU, 0>;
+def : ReadAdvance<ReadIALU32, 0>;
+def : ReadAdvance<ReadShiftImm, 0>;
+def : ReadAdvance<ReadShiftImm32, 0>;
+def : ReadAdvance<ReadShiftReg, 0>;
+def : ReadAdvance<ReadShiftReg32, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIRem, 0>;
+def : ReadAdvance<ReadIRem32, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFStoreData, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
+def : ReadAdvance<ReadFAdd16, 0>;
+def : ReadAdvance<ReadFAdd32, 0>;
+def : ReadAdvance<ReadFAdd64, 0>;
+def : ReadAdvance<ReadFMul16, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMA16, 0>;
+def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA16Addend, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
+def : ReadAdvance<ReadFDiv16, 0>;
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt16, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFSGNJ16, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMovF16ToI16, 0>;
+def : ReadAdvance<ReadFMovI16ToF16, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFMinMax16, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
+def : ReadAdvance<ReadFClass16, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+def : ReadAdvance<ReadFCmp16, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFCvtF16ToI32, 0>;
+def : ReadAdvance<ReadFCvtF16ToI64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF16, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF16, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF16ToF32, 0>;
+def : ReadAdvance<ReadFCvtF16ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF16, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF16, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadSHXADD, 0>;
+def : ReadAdvance<ReadSHXADD32, 0>;
+def : ReadAdvance<ReadRotateImm, 1>;
+def : ReadAdvance<ReadRotateImm32, 1>;
+def : ReadAdvance<ReadRotateReg, 1>;
+def : ReadAdvance<ReadRotateReg32, 1>;
+def : ReadAdvance<ReadCLZ, 0>;
+def : ReadAdvance<ReadCLZ32, 0>;
+def : ReadAdvance<ReadCTZ, 0>;
+def : ReadAdvance<ReadCTZ32, 0>;
+def : ReadAdvance<ReadCPOP, 0>;
+def : ReadAdvance<ReadCPOP32, 0>;
+def : ReadAdvance<ReadREV8, 0>;
+def : ReadAdvance<ReadORCB, 0>;
+def : ReadAdvance<ReadIMinMax, 0>;
+def : ReadAdvance<ReadCLMUL, 0>;
+def : ReadAdvance<ReadSingleBit, 0>;
+def : ReadAdvance<ReadSingleBitImm, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedQ;
+defm : UnsupportedSchedSFB;
+defm : UnsupportedSchedV;
+defm : UnsupportedSchedXsfvcp;
+defm : UnsupportedSchedZabha;
+defm : UnsupportedSchedZbkb;
+defm : UnsupportedSchedZbkx;
+defm : UnsupportedSchedZfa;
+defm : UnsupportedSchedZvk;
+}
diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s
new file mode 100644
index 0000000000000..d1ab4b3b6a7e0
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s
@@ -0,0 +1,129 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-nx45 -timeline -iterations=1 < %s | FileCheck %s
+
+// Can be dispatched in the same cycle if different functional units are used.
+fadd.s ft0, fa0, fa1
+fdiv.s ft0, fa0, fa1
+
+// FMAC
+fadd.s ft0, fa0, fa1
+fmul.s ft0, fa0, fa1
+fmadd.s ft0, fa0, fa1, fa2
+
+// FDIV
+fdiv.s ft0, fa0, fa1
+fsqrt.s ft0, fa0
+
+// FMV
+fsgnj.s ft0, fa0, fa1
+fmv.x.w a0, fa0
+
+// FMISC
+fmin.s ft0, fa0, fa1
+fclass.s a0, fa0
+feq.s a0, fa0, fa1
+fcvt.s.w ft0, a0
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 13
+# CHECK-NEXT: Total Cycles: 63
+# CHECK-NEXT: Total uOps: 13
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.21
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 56.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 4 1.00 fadd.s ft0, fa0, fa1
+# CHECK-NEXT: 1 19 19.00 fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: 1 4 1.00 fadd.s ft0, fa0, fa1
+# CHECK-NEXT: 1 4 1.00 fmul.s ft0, fa0, fa1
+# CHECK-NEXT: 1 4 1.00 fmadd.s ft0, fa0, fa1, fa2
+# CHECK-NEXT: 1 19 19.00 fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: 1 18 18.00 fsqrt.s ft0, fa0
+# CHECK-NEXT: 1 1 1.00 fsgnj.s ft0, fa0, fa1
+# CHECK-NEXT: 1 1 1.00 fmv.x.w a0, fa0
+# CHECK-NEXT: 1 2 1.00 fmin.s ft0, fa0, fa1
+# CHECK-NEXT: 1 2 1.00 fclass.s a0, fa0
+# CHECK-NEXT: 1 2 1.00 feq.s a0, fa0, fa1
+# CHECK-NEXT: 1 2 1.00 fcvt.s.w ft0, a0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - Andes45ALU
+# CHECK-NEXT: [0.1] - Andes45ALU
+# CHECK-NEXT: [1] - Andes45CSR
+# CHECK-NEXT: [2] - Andes45FDIV
+# CHECK-NEXT: [3] - Andes45FMAC
+# CHECK-NEXT: [4] - Andes45FMISC
+# CHECK-NEXT: [5] - Andes45FMV
+# CHECK-NEXT: [6] - Andes45LSU
+# CHECK-NEXT: [7] - Andes45MDU
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7]
+# CHECK-NEXT: - - - 56.00 4.00 4.00 2.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] Instructions:
+# CHECK-NEXT: - - - - 1.00 - - - - fadd.s ft0, fa0, fa1
+# CHECK-NEXT: - - - 19.00 - - - - - fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: - - - - 1.00 - - - - fadd.s ft0, fa0, fa1
+# CHECK-NEXT: - - - - 1.00 - - - - fmul.s ft0, fa0, fa1
+# CHECK-NEXT: - - - - 1.00 - - - - fmadd.s ft0, fa0, fa1, fa2
+# CHECK-NEXT: - - - 19.00 - - - - - fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: - - - 18.00 - - - - - fsqrt.s ft0, fa0
+# CHECK-NEXT: - - - - - - 1.00 - - fsgnj.s ft0, fa0, fa1
+# CHECK-NEXT: - - - - - - 1.00 - - fmv.x.w a0, fa0
+# CHECK-NEXT: - - - - - 1.00 - - - fmin.s ft0, fa0, fa1
+# CHECK-NEXT: - - - - - 1.00 - - - fclass.s a0, fa0
+# CHECK-NEXT: - - - - - 1.00 - - - feq.s a0, fa0, fa1
+# CHECK-NEXT: - - - - - 1.00 - - - fcvt.s.w ft0, a0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeE. . . . . . . . . . . . . fadd.s ft0, fa0, fa1
+# CHECK-NEXT: [0,1] DeeeeeeeeeeeeeeeeeeE. . . . . . . . . . fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: [0,2] . . . DeeeE. . . . . . . . . . fadd.s ft0, fa0, fa1
+# CHECK-NEXT: [0,3] . . . .DeeeE . . . . . . . . . fmul.s ft0, fa0, fa1
+# CHECK-NEXT: [0,4] . . . . DeeeE . . . . . . . . . fmadd.s ft0, fa0, fa1, fa2
+# CHECK-NEXT: [0,5] . . . . DeeeeeeeeeeeeeeeeeeE . . . . . . fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: [0,6] . . . . . . . . DeeeeeeeeeeeeeeeeeE . . fsqrt.s ft0, fa0
+# CHECK-NEXT: [0,7] . . . . . . . . . . . DE . . fsgnj.s ft0, fa0, fa1
+# CHECK-NEXT: [0,8] . . . . . . . . . . . .DE . . fmv.x.w a0, fa0
+# CHECK-NEXT: [0,9] . . . . . . . . . . . .DeE . . fmin.s ft0, fa0, fa1
+# CHECK-NEXT: [0,10] . . . . . . . . . . . . DeE. . fclass.s a0, fa0
+# CHECK-NEXT: [0,11] . . . . . . . . . . . . DeE . feq.s a0, fa0, fa1
+# CHECK-NEXT: [0,12] . . . . . . . . . . . . DeE fcvt.s.w ft0, a0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 fadd.s ft0, fa0, fa1
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 fadd.s ft0, fa0, fa1
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 fmul.s ft0, fa0, fa1
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 fmadd.s ft0, fa0, fa1, fa2
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 fdiv.s ft0, fa0, fa1
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 fsqrt.s ft0, fa0
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 fsgnj.s ft0, fa0, fa1
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 fmv.x.w a0, fa0
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 fmin.s ft0, fa0, fa1
+# CHECK-NEXT: 10. 1 0.0 0.0 0.0 fclass.s a0, fa0
+# CHECK-NEXT: 11. 1 0.0 0.0 0.0 feq.s a0, fa0, fa1
+# CHECK-NEXT: 12. 1 0.0 0.0 0.0 fcvt.s.w ft0, a0
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s
new file mode 100644
index 0000000000000..fb5d4e991fbba
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s
@@ -0,0 +1,126 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-nx45 -timeline -iterations=1 < %s | FileCheck %s
+
+# Two ALUs without dependency can be dispatched in the same cycle.
+add a0, a0, a0
+sub a1, a1, a1
+
+# Otherwise, they can't be dispatched in the same cycle.
+addw a0, a0, a0
+subw a0, a0, a0
+
+// MDU
+mul a0, a0, a0
+div a0, a0, a0
+
+// Memory
+lb a0, 4(a1)
+lw a0, 4(a1)
+sb a0, 4(a1)
+sw a0, 4(a1)
+
+// CSR
+csrrw a0, mstatus, zero
+
+// Bitmanip
+sh1add a0, a0, a0
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 12
+# CHECK-NEXT: Total Cycles: 49
+# CHECK-NEXT: Total uOps: 12
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.24
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 40.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 add a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sub a1, a1, a1
+# CHECK-NEXT: 1 1 0.50 addw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 subw a0, a0, a0
+# CHECK-NEXT: 1 3 1.00 mul a0, a0, a0
+# CHECK-NEXT: 1 39 39.00 div a0, a0, a0
+# CHECK-NEXT: 1 5 1.00 * lb a0, 4(a1)
+# CHECK-NEXT: 1 3 1.00 * lw a0, 4(a1)
+# CHECK-NEXT: 1 1 1.00 * sb a0, 4(a1)
+# CHECK-NEXT: 1 1 1.00 * sw a0, 4(a1)
+# CHECK-NEXT: 1 1 1.00 U csrrw a0, mstatus, zero
+# CHECK-NEXT: 1 1 0.50 sh1add a0, a0, a0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - Andes45ALU
+# CHECK-NEXT: [0.1] - Andes45ALU
+# CHECK-NEXT: [1] - Andes45CSR
+# CHECK-NEXT: [2] - Andes45FDIV
+# CHECK-NEXT: [3] - Andes45FMAC
+# CHECK-NEXT: [4] - Andes45FMISC
+# CHECK-NEXT: [5] - Andes45FMV
+# CHECK-NEXT: [6] - Andes45LSU
+# CHECK-NEXT: [7] - Andes45MDU
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7]
+# CHECK-NEXT: 2.00 3.00 1.00 - - - - 4.00 40.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - add a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - sub a1, a1, a1
+# CHECK-NEXT: - 1.00 - - - - - - - addw a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - subw a0, a0, a0
+# CHECK-NEXT: - - - - - - - - 1.00 mul a0, a0, a0
+# CHECK-NEXT: - - - - - - - - 39.00 div a0, a0, a0
+# CHECK-NEXT: - - - - - - - 1.00 - lb a0, 4(a1)
+# CHECK-NEXT: - - - - - - - 1.00 - lw a0, 4(a1)
+# CHECK-NEXT: - - - - - - - 1.00 - sb a0, 4(a1)
+# CHECK-NEXT: - - - - - - - 1.00 - sw a0, 4(a1)
+# CHECK-NEXT: - - 1.00 - - - - - - csrrw a0, mstatus, zero
+# CHECK-NEXT: - 1.00 - - - - - - - sh1add a0, a0, a0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012345678
+
+# CHECK: [0,0] DE . . . . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,1] DE . . . . . . . . . . sub a1, a1, a1
+# CHECK-NEXT: [0,2] .DE . . . . . . . . . . addw a0, a0, a0
+# CHECK-NEXT: [0,3] . DE . . . . . . . . . . subw a0, a0, a0
+# CHECK-NEXT: [0,4] . DeeE . . . . . . . . . mul a0, a0, a0
+# CHECK-NEXT: [0,5] . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . div a0, a0, a0
+# CHECK-NEXT: [0,6] . . . . . . . . DeeeeE . lb a0, 4(a1)
+# CHECK-NEXT: [0,7] . . . . . . . . . DeeE . lw a0, 4(a1)
+# CHECK-NEXT: [0,8] . . . . . . . . . DE . sb a0, 4(a1)
+# CHECK-NEXT: [0,9] . . . . . . . . . .DE. sw a0, 4(a1)
+# CHECK-NEXT: [0,10] . . . . . . . . . .DE. csrrw a0, mstatus, zero
+# CHECK-NEXT: [0,11] . . . . . . . . . . DE sh1add a0, a0, a0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 add a0, a0, a0
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 sub a1, a1, a1
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 addw a0, a0, a0
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 subw a0, a0, a0
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 mul a0, a0, a0
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 div a0, a0, a0
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 lb a0, 4(a1)
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 lw a0, 4(a1)
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 sb a0, 4(a1)
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 sw a0, 4(a1)
+# CHECK-NEXT: 10. 1 0.0 0.0 0.0 csrrw a0, mstatus, zero
+# CHECK-NEXT: 11. 1 0.0 0.0 0.0 sh1add a0, a0, a0
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
>From 1028f25e2d5d1aa446c1925c727cf72b33974303 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Wed, 4 Jun 2025 13:35:49 +0800
Subject: [PATCH 2/2] Add more tests into gpr.s
---
llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s | 188 ++++++++++++++++---
1 file changed, 157 insertions(+), 31 deletions(-)
diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s
index fb5d4e991fbba..f6dc6eef3f0ff 100644
--- a/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s
+++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-nx45 -timeline -iterations=1 < %s | FileCheck %s
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-nx45 -mattr=+zbc -timeline -iterations=1 < %s | FileCheck %s
# Two ALUs without dependency can be dispatched in the same cycle.
add a0, a0, a0
@@ -9,31 +9,67 @@ sub a1, a1, a1
addw a0, a0, a0
subw a0, a0, a0
+// ALU and Shift
+slli a0, a0, 4
+slliw a0, a0, 4
+srl a0, a0, a0
+srlw a0, a0, a0
+
// MDU
mul a0, a0, a0
+mulw a0, a0, a0
div a0, a0, a0
+divw a0, a0, a0
// Memory
lb a0, 4(a1)
+lh a0, 4(a1)
lw a0, 4(a1)
+ld a0, 4(a1)
+
+flw fa0, 4(a1)
+fld fa0, 4(a1)
+
sb a0, 4(a1)
+sh a0, 4(a1)
sw a0, 4(a1)
+sd a0, 4(a1)
+
+// Atomic Memory
+amoswap.w a0, a1, (a0)
+amoswap.d a0, a1, (a0)
+lr.w a0, (a0)
+lr.d a0, (a0)
+sc.w a0, a1, (a0)
+sc.d a0, a1, (a0)
// CSR
csrrw a0, mstatus, zero
// Bitmanip
sh1add a0, a0, a0
+sh1add.uw a0, a0, a0
+rori a0, a0, 4
+roriw a0, a0, 4
+rol a0, a0, a0
+rolw a0, a0, a0
+clz a0, a0
+clzw a0, a0
+clmul a0, a0, a0
+bclri a0, a0, 4
+bclr a0, a0, a0
+bexti a0, a0, 4
+bext a0, a0, a0
# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 12
-# CHECK-NEXT: Total Cycles: 49
-# CHECK-NEXT: Total uOps: 12
+# CHECK-NEXT: Instructions: 42
+# CHECK-NEXT: Total Cycles: 158
+# CHECK-NEXT: Total uOps: 42
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.24
-# CHECK-NEXT: IPC: 0.24
-# CHECK-NEXT: Block RThroughput: 40.0
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 80.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -48,14 +84,44 @@ sh1add a0, a0, a0
# CHECK-NEXT: 1 1 0.50 sub a1, a1, a1
# CHECK-NEXT: 1 1 0.50 addw a0, a0, a0
# CHECK-NEXT: 1 1 0.50 subw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 slli a0, a0, 4
+# CHECK-NEXT: 1 1 0.50 slliw a0, a0, 4
+# CHECK-NEXT: 1 1 0.50 srl a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 srlw a0, a0, a0
# CHECK-NEXT: 1 3 1.00 mul a0, a0, a0
+# CHECK-NEXT: 1 3 1.00 mulw a0, a0, a0
# CHECK-NEXT: 1 39 39.00 div a0, a0, a0
+# CHECK-NEXT: 1 39 39.00 divw a0, a0, a0
# CHECK-NEXT: 1 5 1.00 * lb a0, 4(a1)
+# CHECK-NEXT: 1 5 1.00 * lh a0, 4(a1)
# CHECK-NEXT: 1 3 1.00 * lw a0, 4(a1)
+# CHECK-NEXT: 1 3 1.00 * ld a0, 4(a1)
+# CHECK-NEXT: 1 3 1.00 * flw fa0, 4(a1)
+# CHECK-NEXT: 1 3 1.00 * fld fa0, 4(a1)
# CHECK-NEXT: 1 1 1.00 * sb a0, 4(a1)
+# CHECK-NEXT: 1 1 1.00 * sh a0, 4(a1)
# CHECK-NEXT: 1 1 1.00 * sw a0, 4(a1)
+# CHECK-NEXT: 1 1 1.00 * sd a0, 4(a1)
+# CHECK-NEXT: 1 9 1.00 * * amoswap.w a0, a1, (a0)
+# CHECK-NEXT: 1 9 1.00 * * amoswap.d a0, a1, (a0)
+# CHECK-NEXT: 1 9 1.00 * lr.w a0, (a0)
+# CHECK-NEXT: 1 9 1.00 * lr.d a0, (a0)
+# CHECK-NEXT: 1 3 1.00 * sc.w a0, a1, (a0)
+# CHECK-NEXT: 1 3 1.00 * sc.d a0, a1, (a0)
# CHECK-NEXT: 1 1 1.00 U csrrw a0, mstatus, zero
# CHECK-NEXT: 1 1 0.50 sh1add a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 sh1add.uw a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 rori a0, a0, 4
+# CHECK-NEXT: 1 1 0.50 roriw a0, a0, 4
+# CHECK-NEXT: 1 1 0.50 rol a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 rolw a0, a0, a0
+# CHECK-NEXT: 1 3 0.50 clz a0, a0
+# CHECK-NEXT: 1 3 0.50 clzw a0, a0
+# CHECK-NEXT: 1 3 0.50 clmul a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 bclri a0, a0, 4
+# CHECK-NEXT: 1 1 0.50 bclr a0, a0, a0
+# CHECK-NEXT: 1 1 0.50 bexti a0, a0, 4
+# CHECK-NEXT: 1 1 0.50 bext a0, a0, a0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - Andes45ALU
@@ -70,7 +136,7 @@ sh1add a0, a0, a0
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: 2.00 3.00 1.00 - - - - 4.00 40.00
+# CHECK-NEXT: 10.00 11.00 1.00 - - - - 16.00 80.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -78,31 +144,61 @@ sh1add a0, a0, a0
# CHECK-NEXT: 1.00 - - - - - - - - sub a1, a1, a1
# CHECK-NEXT: - 1.00 - - - - - - - addw a0, a0, a0
# CHECK-NEXT: 1.00 - - - - - - - - subw a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - - - - - slli a0, a0, 4
+# CHECK-NEXT: 1.00 - - - - - - - - slliw a0, a0, 4
+# CHECK-NEXT: - 1.00 - - - - - - - srl a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - srlw a0, a0, a0
# CHECK-NEXT: - - - - - - - - 1.00 mul a0, a0, a0
+# CHECK-NEXT: - - - - - - - - 1.00 mulw a0, a0, a0
# CHECK-NEXT: - - - - - - - - 39.00 div a0, a0, a0
+# CHECK-NEXT: - - - - - - - - 39.00 divw a0, a0, a0
# CHECK-NEXT: - - - - - - - 1.00 - lb a0, 4(a1)
+# CHECK-NEXT: - - - - - - - 1.00 - lh a0, 4(a1)
# CHECK-NEXT: - - - - - - - 1.00 - lw a0, 4(a1)
+# CHECK-NEXT: - - - - - - - 1.00 - ld a0, 4(a1)
+# CHECK-NEXT: - - - - - - - 1.00 - flw fa0, 4(a1)
+# CHECK-NEXT: - - - - - - - 1.00 - fld fa0, 4(a1)
# CHECK-NEXT: - - - - - - - 1.00 - sb a0, 4(a1)
+# CHECK-NEXT: - - - - - - - 1.00 - sh a0, 4(a1)
# CHECK-NEXT: - - - - - - - 1.00 - sw a0, 4(a1)
+# CHECK-NEXT: - - - - - - - 1.00 - sd a0, 4(a1)
+# CHECK-NEXT: - - - - - - - 1.00 - amoswap.w a0, a1, (a0)
+# CHECK-NEXT: - - - - - - - 1.00 - amoswap.d a0, a1, (a0)
+# CHECK-NEXT: - - - - - - - 1.00 - lr.w a0, (a0)
+# CHECK-NEXT: - - - - - - - 1.00 - lr.d a0, (a0)
+# CHECK-NEXT: - - - - - - - 1.00 - sc.w a0, a1, (a0)
+# CHECK-NEXT: - - - - - - - 1.00 - sc.d a0, a1, (a0)
# CHECK-NEXT: - - 1.00 - - - - - - csrrw a0, mstatus, zero
# CHECK-NEXT: - 1.00 - - - - - - - sh1add a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - sh1add.uw a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - - - - - rori a0, a0, 4
+# CHECK-NEXT: 1.00 - - - - - - - - roriw a0, a0, 4
+# CHECK-NEXT: - 1.00 - - - - - - - rol a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - rolw a0, a0, a0
+# CHECK-NEXT: - 1.00 - - - - - - - clz a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - clzw a0, a0
+# CHECK-NEXT: - 1.00 - - - - - - - clmul a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - bclri a0, a0, 4
+# CHECK-NEXT: - 1.00 - - - - - - - bclr a0, a0, a0
+# CHECK-NEXT: 1.00 - - - - - - - - bexti a0, a0, 4
+# CHECK-NEXT: - 1.00 - - - - - - - bext a0, a0, a0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 012345678
-
-# CHECK: [0,0] DE . . . . . . . . . . add a0, a0, a0
-# CHECK-NEXT: [0,1] DE . . . . . . . . . . sub a1, a1, a1
-# CHECK-NEXT: [0,2] .DE . . . . . . . . . . addw a0, a0, a0
-# CHECK-NEXT: [0,3] . DE . . . . . . . . . . subw a0, a0, a0
-# CHECK-NEXT: [0,4] . DeeE . . . . . . . . . mul a0, a0, a0
-# CHECK-NEXT: [0,5] . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . div a0, a0, a0
-# CHECK-NEXT: [0,6] . . . . . . . . DeeeeE . lb a0, 4(a1)
-# CHECK-NEXT: [0,7] . . . . . . . . . DeeE . lw a0, 4(a1)
-# CHECK-NEXT: [0,8] . . . . . . . . . DE . sb a0, 4(a1)
-# CHECK-NEXT: [0,9] . . . . . . . . . .DE. sw a0, 4(a1)
-# CHECK-NEXT: [0,10] . . . . . . . . . .DE. csrrw a0, mstatus, zero
-# CHECK-NEXT: [0,11] . . . . . . . . . . DE sh1add a0, a0, a0
+# CHECK-NEXT: 0123456789 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DE . . . . . . . . . . . add a0, a0, a0
+# CHECK-NEXT: [0,1] DE . . . . . . . . . . . sub a1, a1, a1
+# CHECK-NEXT: [0,2] .DE . . . . . . . . . . . addw a0, a0, a0
+# CHECK-NEXT: [0,3] . DE . . . . . . . . . . . subw a0, a0, a0
+# CHECK-NEXT: [0,4] . DE. . . . . . . . . . . slli a0, a0, 4
+# CHECK-NEXT: [0,5] . DE . . . . . . . . . . slliw a0, a0, 4
+# CHECK-NEXT: [0,6] . DE . . . . . . . . . . srl a0, a0, a0
+# CHECK-NEXT: [0,7] . .DE . . . . . . . . . . srlw a0, a0, a0
+# CHECK-NEXT: [0,8] . . DeeE . . . . . . . . . mul a0, a0, a0
+# CHECK-NEXT: [0,9] . . DeeE . . . . . . . . . mulw a0, a0, a0
+# CHECK-NEXT: [0,10] . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE div a0, a0, a0
+# CHECK-NEXT: Truncated display due to cycle limit
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -115,12 +211,42 @@ sh1add a0, a0, a0
# CHECK-NEXT: 1. 1 0.0 0.0 0.0 sub a1, a1, a1
# CHECK-NEXT: 2. 1 0.0 0.0 0.0 addw a0, a0, a0
# CHECK-NEXT: 3. 1 0.0 0.0 0.0 subw a0, a0, a0
-# CHECK-NEXT: 4. 1 0.0 0.0 0.0 mul a0, a0, a0
-# CHECK-NEXT: 5. 1 0.0 0.0 0.0 div a0, a0, a0
-# CHECK-NEXT: 6. 1 0.0 0.0 0.0 lb a0, 4(a1)
-# CHECK-NEXT: 7. 1 0.0 0.0 0.0 lw a0, 4(a1)
-# CHECK-NEXT: 8. 1 0.0 0.0 0.0 sb a0, 4(a1)
-# CHECK-NEXT: 9. 1 0.0 0.0 0.0 sw a0, 4(a1)
-# CHECK-NEXT: 10. 1 0.0 0.0 0.0 csrrw a0, mstatus, zero
-# CHECK-NEXT: 11. 1 0.0 0.0 0.0 sh1add a0, a0, a0
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 slli a0, a0, 4
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 slliw a0, a0, 4
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 srl a0, a0, a0
+# CHECK-NEXT: 7. 1 0.0 0.0 0.0 srlw a0, a0, a0
+# CHECK-NEXT: 8. 1 0.0 0.0 0.0 mul a0, a0, a0
+# CHECK-NEXT: 9. 1 0.0 0.0 0.0 mulw a0, a0, a0
+# CHECK-NEXT: 10. 1 0.0 0.0 0.0 div a0, a0, a0
+# CHECK-NEXT: 11. 1 0.0 0.0 0.0 divw a0, a0, a0
+# CHECK-NEXT: 12. 1 0.0 0.0 0.0 lb a0, 4(a1)
+# CHECK-NEXT: 13. 1 0.0 0.0 0.0 lh a0, 4(a1)
+# CHECK-NEXT: 14. 1 0.0 0.0 0.0 lw a0, 4(a1)
+# CHECK-NEXT: 15. 1 0.0 0.0 0.0 ld a0, 4(a1)
+# CHECK-NEXT: 16. 1 0.0 0.0 0.0 flw fa0, 4(a1)
+# CHECK-NEXT: 17. 1 0.0 0.0 0.0 fld fa0, 4(a1)
+# CHECK-NEXT: 18. 1 0.0 0.0 0.0 sb a0, 4(a1)
+# CHECK-NEXT: 19. 1 0.0 0.0 0.0 sh a0, 4(a1)
+# CHECK-NEXT: 20. 1 0.0 0.0 0.0 sw a0, 4(a1)
+# CHECK-NEXT: 21. 1 0.0 0.0 0.0 sd a0, 4(a1)
+# CHECK-NEXT: 22. 1 0.0 0.0 0.0 amoswap.w a0, a1, (a0)
+# CHECK-NEXT: 23. 1 0.0 0.0 0.0 amoswap.d a0, a1, (a0)
+# CHECK-NEXT: 24. 1 0.0 0.0 0.0 lr.w a0, (a0)
+# CHECK-NEXT: 25. 1 0.0 0.0 0.0 lr.d a0, (a0)
+# CHECK-NEXT: 26. 1 0.0 0.0 0.0 sc.w a0, a1, (a0)
+# CHECK-NEXT: 27. 1 0.0 0.0 0.0 sc.d a0, a1, (a0)
+# CHECK-NEXT: 28. 1 0.0 0.0 0.0 csrrw a0, mstatus, zero
+# CHECK-NEXT: 29. 1 0.0 0.0 0.0 sh1add a0, a0, a0
+# CHECK-NEXT: 30. 1 0.0 0.0 0.0 sh1add.uw a0, a0, a0
+# CHECK-NEXT: 31. 1 0.0 0.0 0.0 rori a0, a0, 4
+# CHECK-NEXT: 32. 1 0.0 0.0 0.0 roriw a0, a0, 4
+# CHECK-NEXT: 33. 1 0.0 0.0 0.0 rol a0, a0, a0
+# CHECK-NEXT: 34. 1 0.0 0.0 0.0 rolw a0, a0, a0
+# CHECK-NEXT: 35. 1 0.0 0.0 0.0 clz a0, a0
+# CHECK-NEXT: 36. 1 0.0 0.0 0.0 clzw a0, a0
+# CHECK-NEXT: 37. 1 0.0 0.0 0.0 clmul a0, a0, a0
+# CHECK-NEXT: 38. 1 0.0 0.0 0.0 bclri a0, a0, 4
+# CHECK-NEXT: 39. 1 0.0 0.0 0.0 bclr a0, a0, a0
+# CHECK-NEXT: 40. 1 0.0 0.0 0.0 bexti a0, a0, 4
+# CHECK-NEXT: 41. 1 0.0 0.0 0.0 bext a0, a0, a0
# CHECK-NEXT: 1 0.0 0.0 0.0 <total>
More information about the llvm-commits
mailing list