[llvm] [RISCV][llvm-mca] Use correct LMUL and SEW for strided loads and stores (PR #76869)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 3 14:13:55 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Michael Maitland (michaelmaitland)

<details>
<summary>Changes</summary>

The pseudos for strided loads and stores use the SEW coming from the name. For example, vlse8 has SEW=8 and vlse16 has SEW=16.

When llvm-mca tries to lookup (VLSE8_V, SEW=S, LMUL=L) in the inverse pseudo table, a result will only be found when S=8, where S was set from the previous vsetvli instruction. Instead, for a match to be found, we must lookup (VLSE8_V, SEW=8, LMUL=L') where L' is the EMUL which was calculated by scaling the LMUL and SEW from the previous vsetvli and the SEW=8.

---

Patch is 29.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76869.diff


2 Files Affected:

- (modified) llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp (+24-9) 
- (added) llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-store.s (+428) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index aba2511959af03..8d97c5ffd20a05 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -186,30 +186,37 @@ RISCVInstrumentManager::createInstruments(const MCInst &Inst) {
 }
 
 static std::pair<uint8_t, uint8_t>
-getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL,
-                                    uint8_t SEW) {
+getEEWAndEMUL(unsigned Opcode, RISCVII::VLMUL LMUL, uint8_t SEW) {
   uint8_t EEW;
   switch (Opcode) {
   case RISCV::VLM_V:
   case RISCV::VSM_V:
   case RISCV::VLE8_V:
   case RISCV::VSE8_V:
+  case RISCV::VLSE8_V:
+  case RISCV::VSSE8_V:
     EEW = 8;
     break;
   case RISCV::VLE16_V:
   case RISCV::VSE16_V:
+  case RISCV::VLSE16_V:
+  case RISCV::VSSE16_V:
     EEW = 16;
     break;
   case RISCV::VLE32_V:
   case RISCV::VSE32_V:
+  case RISCV::VLSE32_V:
+  case RISCV::VSSE32_V:
     EEW = 32;
     break;
   case RISCV::VLE64_V:
   case RISCV::VSE64_V:
+  case RISCV::VLSE64_V:
+  case RISCV::VSSE64_V:
     EEW = 64;
     break;
   default:
-    llvm_unreachable("Opcode is not a vector unit stride load nor store");
+    llvm_unreachable("Could not determine EEW from Opcode");
   }
 
   auto EMUL = RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW);
@@ -218,6 +225,18 @@ getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL,
   return std::make_pair(EEW, *EMUL);
 }
 
+bool opcodeHasEEWAndEMULInfo(unsigned short Opcode) {
+  return Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V ||
+         Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V ||
+         Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V ||
+         Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V ||
+         Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V ||
+         Opcode == RISCV::VLSE8_V || Opcode == RISCV::VSSE8_V ||
+         Opcode == RISCV::VLSE16_V || Opcode == RISCV::VSSE16_V ||
+         Opcode == RISCV::VLSE32_V || Opcode == RISCV::VSSE32_V ||
+         Opcode == RISCV::VLSE64_V || Opcode == RISCV::VSSE64_V;
+}
+
 unsigned RISCVInstrumentManager::getSchedClassID(
     const MCInstrInfo &MCII, const MCInst &MCI,
     const llvm::SmallVector<Instrument *> &IVec) const {
@@ -249,13 +268,9 @@ unsigned RISCVInstrumentManager::getSchedClassID(
   uint8_t SEW = SI ? SI->getSEW() : 0;
 
   const RISCVVInversePseudosTable::PseudoInfo *RVV = nullptr;
-  if (Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V ||
-      Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V ||
-      Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V ||
-      Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V ||
-      Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V) {
+  if (opcodeHasEEWAndEMULInfo(Opcode)) {
     RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(LMUL);
-    auto [EEW, EMUL] = getEEWAndEMULForUnitStrideLoadStore(Opcode, VLMUL, SEW);
+    auto [EEW, EMUL] = getEEWAndEMUL(Opcode, VLMUL, SEW);
     RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, EMUL, EEW);
   } else {
     // Check if it depends on LMUL and SEW
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-store.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-store.s
new file mode 100644
index 00000000000000..13350b954f77c6
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-store.s
@@ -0,0 +1,428 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 < %s | FileCheck %s
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e8, mf4, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e8, mf2, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e8, m1, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e8, m2, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+
+vsetvli zero, zero, e8, m4, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+
+vsetvli zero, zero, e8, m8, tu, mu
+vlse8.v  v1, (a1), a2
+
+vsetvli zero, zero, e16, mf8, tu, mu
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e16, mf4, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e16, mf2, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e16, m1, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e16, m2, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e16, m4, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+
+vsetvli zero, zero, e16, m8, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+
+vsetvli zero, zero, e32, mf8, tu, mu
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, mf4, tu, mu
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, mf2, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, m1, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, m2, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, m4, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e32, m8, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+
+vsetvli zero, zero, e64, mf8, tu, mu
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, mf4, tu, mu
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, mf2, tu, mu
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, m1, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, m2, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, m4, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vsetvli zero, zero, e64, m8, tu, mu
+vlse8.v  v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      120
+# CHECK-NEXT: Total Cycles:      6552
+# CHECK-NEXT: Total uOps:        120
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.02
+# CHECK-NEXT: IPC:               0.02
+# CHECK-NEXT: Block RThroughput: 6492.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      11    9.00    *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      19    17.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      35    33.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      67    65.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  1      131   129.00  *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      131   129.00  *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      131   129.00  *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  1      259   257.00  *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      259   257.00  *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m8, tu, mu
+# CHECK-NEXT:  1      515   513.00  *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT:  1      259   257.00  *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      131   129.00  *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      11    9.00    *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      19    17.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      35    33.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      67    65.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  1      131   129.00  *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      131   129.00  *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      131   129.00  *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  1      259   257.00  *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      259   257.00  *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT:  1      131   129.00  *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT:  1      259   257.00  *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      131   129.00  *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      11    9.00    *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      19    17.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      35    33.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      67    65.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      131   129.00  *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      131   129.00  *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      131   129.00  *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT:  1      67    65.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT:  1      131   129.00  *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT:  1      259   257.00  *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      131   129.00  *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      11    9.00    *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      11    9.00    *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      19    17.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      19    17.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      35    33.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      35    33.00   *                   vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      67    65.00   *                   vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  1      67    65.00   *                   vlse64.v	v1, (a1), a2
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - SiFive7FDiv
+# CHECK-NEXT: [1]   - SiFive7IDiv
+# CHECK-NEXT: [2]   - SiFive7Mem
+# CHECK-NEXT: [3]   - SiFive7PipeA
+# CHECK-NEXT: [4]   - SiFive7PipeB
+# CHECK-NEXT: [5]   - SiFive7VA
+# CHECK-NEXT: [6]   - SiFive7VCQ
+# CHECK-NEXT: [7]   - SiFive7VL
+# CHECK-NEXT: [8]   - SiFive7VS
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]
+# CHECK-NEXT:  -      -      -     28.00   -      -     92.00  6492.00  -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    Instructions:
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   9.00    -     vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   9.00    -     vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   9.00    -     vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   9.00    -     vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   17.00   -     vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   17.00   -     vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   17.00   -     vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   17.00   -     vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   33.00   -     vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   33.00   -     vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   33.00   -     vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   33.00   -     vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   65.00   -     vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   65.00   -     vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   65.00   -     vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   65.00   -     vlse64.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   129.00  -     vlse8.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   129.00  -     vlse16.v	v1, (a1), a2
+# CHECK-NEXT:  -      -      -      -      -      -     1.00   129.00  -     vlse32.v	v1, (a1), a2
+# CHECK-NEXT:  - ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/76869


More information about the llvm-commits mailing list