[llvm] [RISCV][llvm-mca] Use correct LMUL and SEW for strided loads and stores (PR #76869)
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 3 14:26:06 PST 2024
https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/76869
>From e7b90a7635733bfd0b3d575f96270a2c7ea3ca92 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 3 Jan 2024 14:08:46 -0800
Subject: [PATCH] [RISCV][llvm-mca] Use correct LMUL and SEW for strided loads
and stores
The pseudos for strided loads and stores use the SEW coming from the
name. For example, vlse8 has SEW=8 and vlse16 has SEW=16.
When llvm-mca tries to lookup (VLSE8_V, SEW=S, LMUL=L) in the inverse pseudo
table, a result will only be found when S=8, where S was set from the previous
vsetvli instruction. Instead, for a match to be found, we must lookup
(VLSE8_V, SEW=8, LMUL=L') where L' is the EMUL which was calculated by scaling
the LMUL and SEW from the previous vsetvli and the SEW=8.
---
.../Target/RISCV/MCA/RISCVCustomBehaviour.cpp | 33 +-
.../RISCV/SiFive7/strided-load-store.s | 427 ++++++++++++++++++
2 files changed, 451 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-store.s
diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index aba2511959af03..8d97c5ffd20a05 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -186,30 +186,37 @@ RISCVInstrumentManager::createInstruments(const MCInst &Inst) {
}
static std::pair<uint8_t, uint8_t>
-getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL,
- uint8_t SEW) {
+getEEWAndEMUL(unsigned Opcode, RISCVII::VLMUL LMUL, uint8_t SEW) {
uint8_t EEW;
switch (Opcode) {
case RISCV::VLM_V:
case RISCV::VSM_V:
case RISCV::VLE8_V:
case RISCV::VSE8_V:
+ case RISCV::VLSE8_V:
+ case RISCV::VSSE8_V:
EEW = 8;
break;
case RISCV::VLE16_V:
case RISCV::VSE16_V:
+ case RISCV::VLSE16_V:
+ case RISCV::VSSE16_V:
EEW = 16;
break;
case RISCV::VLE32_V:
case RISCV::VSE32_V:
+ case RISCV::VLSE32_V:
+ case RISCV::VSSE32_V:
EEW = 32;
break;
case RISCV::VLE64_V:
case RISCV::VSE64_V:
+ case RISCV::VLSE64_V:
+ case RISCV::VSSE64_V:
EEW = 64;
break;
default:
- llvm_unreachable("Opcode is not a vector unit stride load nor store");
+ llvm_unreachable("Could not determine EEW from Opcode");
}
auto EMUL = RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW);
@@ -218,6 +225,18 @@ getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL,
return std::make_pair(EEW, *EMUL);
}
+bool opcodeHasEEWAndEMULInfo(unsigned short Opcode) {
+ return Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V ||
+ Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V ||
+ Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V ||
+ Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V ||
+ Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V ||
+ Opcode == RISCV::VLSE8_V || Opcode == RISCV::VSSE8_V ||
+ Opcode == RISCV::VLSE16_V || Opcode == RISCV::VSSE16_V ||
+ Opcode == RISCV::VLSE32_V || Opcode == RISCV::VSSE32_V ||
+ Opcode == RISCV::VLSE64_V || Opcode == RISCV::VSSE64_V;
+}
+
unsigned RISCVInstrumentManager::getSchedClassID(
const MCInstrInfo &MCII, const MCInst &MCI,
const llvm::SmallVector<Instrument *> &IVec) const {
@@ -249,13 +268,9 @@ unsigned RISCVInstrumentManager::getSchedClassID(
uint8_t SEW = SI ? SI->getSEW() : 0;
const RISCVVInversePseudosTable::PseudoInfo *RVV = nullptr;
- if (Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V ||
- Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V ||
- Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V ||
- Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V ||
- Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V) {
+ if (opcodeHasEEWAndEMULInfo(Opcode)) {
RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(LMUL);
- auto [EEW, EMUL] = getEEWAndEMULForUnitStrideLoadStore(Opcode, VLMUL, SEW);
+ auto [EEW, EMUL] = getEEWAndEMUL(Opcode, VLMUL, SEW);
RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, EMUL, EEW);
} else {
// Check if it depends on LMUL and SEW
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-store.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-store.s
new file mode 100644
index 00000000000000..2a3326884197d9
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-store.s
@@ -0,0 +1,427 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 < %s | FileCheck %s
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e8, mf4, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e8, mf2, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e8, m1, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e8, m2, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+
+vsetvli zero, zero, e8, m4, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+
+vsetvli zero, zero, e8, m8, tu, mu
+vlse8.v v1, (a1), a2
+
+vsetvli zero, zero, e16, mf8, tu, mu
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e16, mf4, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e16, mf2, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e16, m1, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e16, m2, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e16, m4, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+
+vsetvli zero, zero, e16, m8, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+
+vsetvli zero, zero, e32, mf8, tu, mu
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, mf4, tu, mu
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, mf2, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, m1, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, m2, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, m4, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, m8, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+
+vsetvli zero, zero, e64, mf8, tu, mu
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, mf4, tu, mu
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, mf2, tu, mu
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, m1, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, m2, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, m4, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, m8, tu, mu
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 120
+# CHECK-NEXT: Total Cycles: 6552
+# CHECK-NEXT: Total uOps: 120
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.02
+# CHECK-NEXT: IPC: 0.02
+# CHECK-NEXT: Block RThroughput: 6492.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: 1 11 9.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: 1 19 17.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: 1 35 33.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 67 65.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: 1 131 129.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 131 129.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 131 129.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: 1 259 257.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 259 257.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: 1 515 513.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: 1 259 257.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 131 129.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: 1 11 9.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: 1 19 17.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 35 33.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: 1 67 65.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: 1 131 129.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 131 129.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 131 129.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: 1 259 257.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 259 257.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: 1 131 129.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: 1 259 257.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 131 129.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: 1 11 9.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 19 17.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: 1 35 33.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 67 65.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: 1 131 129.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 131 129.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 131 129.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT: 1 67 65.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT: 1 131 129.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT: 1 259 257.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 131 129.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 11 9.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 11 9.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: 1 19 17.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 19 17.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: 1 35 33.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 35 33.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e64, m8, tu, mu
+# CHECK-NEXT: 1 67 65.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 67 65.00 * vlse64.v v1, (a1), a2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SiFive7FDiv
+# CHECK-NEXT: [1] - SiFive7IDiv
+# CHECK-NEXT: [2] - SiFive7PipeA
+# CHECK-NEXT: [3] - SiFive7PipeB
+# CHECK-NEXT: [4] - SiFive7VA
+# CHECK-NEXT: [5] - SiFive7VCQ
+# CHECK-NEXT: [6] - SiFive7VL
+# CHECK-NEXT: [7] - SiFive7VS
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
+# CHECK-NEXT: - - 28.00 - - 92.00 6492.00 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: - - - - - 1.00 257.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 257.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: - - - - - 1.00 513.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: - - - - - 1.00 257.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: - - - - - 1.00 257.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 257.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: - - - - - 1.00 257.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT: - - - - - 1.00 257.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 129.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 9.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 17.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 33.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e64, m8, tu, mu
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - - 1.00 65.00 - vlse64.v v1, (a1), a2
More information about the llvm-commits
mailing list