[llvm] [RISCV][llvm-mca] Vector Unit Stride Loads and stores use EEW and EMU… (PR #69409)

Michael Maitland via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 19 11:02:21 PDT 2023


https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/69409

>From 1a5d4a8c43f2344d184bfcc0aeb1eb9e46c06081 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Oct 2023 18:28:41 -0700
Subject: [PATCH 1/4] [RISCV][llvm-mca] Vector Unit Stride Loads and stores use
 EEW and EMUL based on instruction EEW

Vector Unit Stride Loads and stores EEW and EMUL depend on the EEW given
in the instruction name. llvm-mca needs some help to correctly report
this information.
---
 .../Target/RISCV/MCA/RISCVCustomBehaviour.cpp |  127 +-
 llvm/test/tools/llvm-mca/RISCV/vle-vse.s      | 1249 +++++++++++++++++
 2 files changed, 1370 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/tools/llvm-mca/RISCV/vle-vse.s

diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index 16f9c113e29a915..17eda5e68f796d5 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -17,6 +17,8 @@
 #include "TargetInfo/RISCVTargetInfo.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/Debug.h"
+#include <numeric>
+#include <set>
 
 #define DEBUG_TYPE "llvm-mca-riscv-custombehaviour"
 
@@ -185,6 +187,109 @@ RISCVInstrumentManager::createInstruments(const MCInst &Inst) {
   return SmallVector<UniqueInstrument>();
 }
 
+/// Return EMUL = (EEW / SEW) * LMUL
+inline static std::pair<unsigned, bool>
+getEMULEqualsEEWDivSEWTimesLMUL(unsigned EEW, unsigned SEW,
+                                RISCVII::VLMUL VLMUL) {
+  // Calculate (EEW/SEW)*LMUL preserving fractions less than 1. Use GCD
+  // to put fraction in simplest form.
+  auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
+  unsigned Num = EEW, Denom = SEW;
+  int GCD =
+      Fractional ? std::gcd(Num, Denom * LMUL) : std::gcd(Num * LMUL, Denom);
+  Num = Fractional ? Num / GCD : Num * LMUL / GCD;
+  Denom = Fractional ? Denom * LMUL / GCD : Denom / GCD;
+  return std::make_pair(Num > Denom ? Num : Denom, Denom > Num);
+}
+
+static std::pair<uint8_t, uint8_t>
+getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, uint8_t LMUL,
+                                    uint8_t SEW) {
+  uint8_t EEW;
+  switch (Opcode) {
+  case RISCV::VLM_V:
+  case RISCV::VSM_V:
+  case RISCV::VLE8_V:
+  case RISCV::VSE8_V:
+    EEW = 8;
+    break;
+  case RISCV::VLE16_V:
+  case RISCV::VSE16_V:
+    EEW = 16;
+    break;
+  case RISCV::VLE32_V:
+  case RISCV::VSE32_V:
+    EEW = 32;
+    break;
+  case RISCV::VLE64_V:
+  case RISCV::VSE64_V:
+    EEW = 64;
+    break;
+  default:
+    llvm_unreachable("Opcode is not a vector unit stride load nor store");
+  }
+
+  RISCVII::VLMUL VLMUL;
+  switch (LMUL) {
+  case 0b000:
+    VLMUL = RISCVII::LMUL_1;
+    break;
+  case 0b001:
+    VLMUL = RISCVII::LMUL_2;
+    break;
+  case 0b010:
+    VLMUL = RISCVII::LMUL_4;
+    break;
+  case 0b011:
+    VLMUL = RISCVII::LMUL_8;
+    break;
+  case 0b111:
+    VLMUL = RISCVII::LMUL_F2;
+    break;
+  case 0b110:
+    VLMUL = RISCVII::LMUL_F4;
+    break;
+  case 0b101:
+    VLMUL = RISCVII::LMUL_F8;
+    break;
+  case RISCVII::LMUL_RESERVED:
+    llvm_unreachable("LMUL cannot be LMUL_RESERVED");
+  }
+
+  auto [EMULPart, Fractional] =
+      getEMULEqualsEEWDivSEWTimesLMUL(EEW, SEW, VLMUL);
+  assert(RISCVVType::isValidLMUL(EMULPart, Fractional) &&
+         "Unexpected EEW from instruction used with LMUL and SEW");
+
+  uint8_t EMUL;
+  switch (RISCVVType::encodeLMUL(EMULPart, Fractional)) {
+  case RISCVII::LMUL_1:
+    EMUL = 0b000;
+    break;
+  case RISCVII::LMUL_2:
+    EMUL = 0b001;
+    break;
+  case RISCVII::LMUL_4:
+    EMUL = 0b010;
+    break;
+  case RISCVII::LMUL_8:
+    EMUL = 0b011;
+    break;
+  case RISCVII::LMUL_F2:
+    EMUL = 0b111;
+    break;
+  case RISCVII::LMUL_F4:
+    EMUL = 0b110;
+    break;
+  case RISCVII::LMUL_F8:
+    EMUL = 0b101;
+    break;
+  case RISCVII::LMUL_RESERVED:
+    llvm_unreachable("Cannot create instrument for LMUL_RESERVED");
+  }
+  return std::make_pair(EEW, EMUL);
+}
+
 unsigned RISCVInstrumentManager::getSchedClassID(
     const MCInstrInfo &MCII, const MCInst &MCI,
     const llvm::SmallVector<Instrument *> &IVec) const {
@@ -214,12 +319,22 @@ unsigned RISCVInstrumentManager::getSchedClassID(
   // or (Opcode, LMUL, SEW) if SEW instrument is active, and depends on LMUL
   // and SEW, or (Opcode, LMUL, 0) if does not depend on SEW.
   uint8_t SEW = SI ? SI->getSEW() : 0;
-  // Check if it depends on LMUL and SEW
-  const RISCVVInversePseudosTable::PseudoInfo *RVV =
-      RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, SEW);
-  // Check if it depends only on LMUL
-  if (!RVV)
-    RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, 0);
+
+  const RISCVVInversePseudosTable::PseudoInfo *RVV = nullptr;
+  if (Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V ||
+      Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V ||
+      Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V ||
+      Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V ||
+      Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V) {
+    auto [EEW, EMUL] = getEEWAndEMULForUnitStrideLoadStore(Opcode, LMUL, SEW);
+    RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, EMUL, EEW);
+  } else {
+    // Check if it depends on LMUL and SEW
+    RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, SEW);
+    // Check if it depends only on LMUL
+    if (!RVV)
+      RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, 0);
+  }
 
   // Not a RVV instr
   if (!RVV) {
diff --git a/llvm/test/tools/llvm-mca/RISCV/vle-vse.s b/llvm/test/tools/llvm-mca/RISCV/vle-vse.s
new file mode 100644
index 000000000000000..15b8f854c587657
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/vle-vse.s
@@ -0,0 +1,1249 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 < %s | FileCheck %s
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e8, m8, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vle8.v    v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vle8.v    v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vle16.v    v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vle16.v    v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vle32.v    v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vle32.v    v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vle64.v    v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vle64.v    v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e8, m8, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vse8.v    v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vse8.v    v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vse16.v    v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vse16.v    v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vse32.v    v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vse32.v    v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vse64.v    v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vse64.v    v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e8, m8, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vlm.v    v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vlm.v    v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e8, m8, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vsm.v    v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vsm.v    v1, (a0)
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      1084
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.37
+# CHECK-NEXT: IPC:               0.37
+# CHECK-NEXT: Block RThroughput: 848.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m8, tu, mu
+# CHECK-NEXT:  1      4     16.00   *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  1      4     16.00   *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  1      4     16.00   *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  1      4     16.00   *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  1      4     16.00   *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      4     16.00   *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      4     1.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      4     16.00   *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      4     16.00   *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      4     16.00   *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      4     4.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      4     8.00    *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      4     16.00   *                   vle64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m8, tu, mu
+# CHECK-NEXT:  1      1     16.00          *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse8.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  1      1     16.00          *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  1      1     16.00          *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse16.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  1      1     16.00          *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  1      1     16.00          *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      1     16.00          *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      1     1.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse32.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      1     16.00          *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      1     16.00          *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      1     16.00          *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      1     4.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      1     8.00           *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      1     16.00          *            vse64.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m8, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      4     2.00    *                   vlm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e8, m8, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+# CHECK-NEXT:  1      3     1.00                  U     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      1     2.00           *            vsm.v	v1, (a0)
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - SiFive7FDiv
+# CHECK-NEXT: [1]   - SiFive7IDiv
+# CHECK-NEXT: [2]   - SiFive7PipeA
+# CHECK-NEXT: [3]   - SiFive7PipeB
+# CHECK-NEXT: [4]   - SiFive7PipeV
+# CHECK-NEXT: [5]   - SiFive7VA
+# CHECK-NEXT: [6]   - SiFive7VL
+# CHECK-NEXT: [7]   - SiFive7VS
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
+# CHECK-NEXT:  -      -     200.00  -     848.00  -     424.00 424.00
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -     16.00   -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -     16.00   -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -     16.00   -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -     16.00   -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -     16.00   -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -     16.00   -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -     16.00   -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -     16.00   -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -     16.00   -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -     4.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -     8.00    -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -     16.00   -     vle64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -      -     16.00  vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse8.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -      -     16.00  vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -      -     16.00  vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse16.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -      -     16.00  vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -      -     16.00  vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -      -     16.00  vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse32.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -      -     16.00  vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -      -     16.00  vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -      -     16.00  vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     4.00    -      -     4.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     8.00   vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     16.00   -      -     16.00  vse64.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -     vlm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e8, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     2.00   vsm.v	v1, (a0)

>From a3ef16761bd5eb0ef18b29079da4ab12c1ad36d4 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Oct 2023 20:05:43 -0700
Subject: [PATCH 2/4] Use static cast for LMUL to/from VLMUL

---
 .../Target/RISCV/MCA/RISCVCustomBehaviour.cpp | 56 +------------------
 1 file changed, 3 insertions(+), 53 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index 17eda5e68f796d5..b334415e13d1ae3 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -229,64 +229,14 @@ getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, uint8_t LMUL,
     llvm_unreachable("Opcode is not a vector unit stride load nor store");
   }
 
-  RISCVII::VLMUL VLMUL;
-  switch (LMUL) {
-  case 0b000:
-    VLMUL = RISCVII::LMUL_1;
-    break;
-  case 0b001:
-    VLMUL = RISCVII::LMUL_2;
-    break;
-  case 0b010:
-    VLMUL = RISCVII::LMUL_4;
-    break;
-  case 0b011:
-    VLMUL = RISCVII::LMUL_8;
-    break;
-  case 0b111:
-    VLMUL = RISCVII::LMUL_F2;
-    break;
-  case 0b110:
-    VLMUL = RISCVII::LMUL_F4;
-    break;
-  case 0b101:
-    VLMUL = RISCVII::LMUL_F8;
-    break;
-  case RISCVII::LMUL_RESERVED:
-    llvm_unreachable("LMUL cannot be LMUL_RESERVED");
-  }
-
+  RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(LMUL);
   auto [EMULPart, Fractional] =
       getEMULEqualsEEWDivSEWTimesLMUL(EEW, SEW, VLMUL);
   assert(RISCVVType::isValidLMUL(EMULPart, Fractional) &&
          "Unexpected EEW from instruction used with LMUL and SEW");
 
-  uint8_t EMUL;
-  switch (RISCVVType::encodeLMUL(EMULPart, Fractional)) {
-  case RISCVII::LMUL_1:
-    EMUL = 0b000;
-    break;
-  case RISCVII::LMUL_2:
-    EMUL = 0b001;
-    break;
-  case RISCVII::LMUL_4:
-    EMUL = 0b010;
-    break;
-  case RISCVII::LMUL_8:
-    EMUL = 0b011;
-    break;
-  case RISCVII::LMUL_F2:
-    EMUL = 0b111;
-    break;
-  case RISCVII::LMUL_F4:
-    EMUL = 0b110;
-    break;
-  case RISCVII::LMUL_F8:
-    EMUL = 0b101;
-    break;
-  case RISCVII::LMUL_RESERVED:
-    llvm_unreachable("Cannot create instrument for LMUL_RESERVED");
-  }
+  uint8_t EMUL =
+      static_cast<RISCVII::VLMUL>(RISCVVType::encodeLMUL(EMULPart, Fractional));
   return std::make_pair(EEW, EMUL);
 }
 

>From 76decd1a42d818fa2fa074c2a37ffa61db153ee2 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 18 Oct 2023 11:13:29 -0700
Subject: [PATCH 3/4] Don't use GCD to calculate EMUL

---
 .../Target/RISCV/MCA/RISCVCustomBehaviour.cpp | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index b334415e13d1ae3..f70192435b80a87 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -17,8 +17,6 @@
 #include "TargetInfo/RISCVTargetInfo.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/Debug.h"
-#include <numeric>
-#include <set>
 
 #define DEBUG_TYPE "llvm-mca-riscv-custombehaviour"
 
@@ -188,18 +186,29 @@ RISCVInstrumentManager::createInstruments(const MCInst &Inst) {
 }
 
 /// Return EMUL = (EEW / SEW) * LMUL
-inline static std::pair<unsigned, bool>
+inline static RISCVII::VLMUL
 getEMULEqualsEEWDivSEWTimesLMUL(unsigned EEW, unsigned SEW,
                                 RISCVII::VLMUL VLMUL) {
-  // Calculate (EEW/SEW)*LMUL preserving fractions less than 1. Use GCD
-  // to put fraction in simplest form.
-  auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
-  unsigned Num = EEW, Denom = SEW;
-  int GCD =
-      Fractional ? std::gcd(Num, Denom * LMUL) : std::gcd(Num * LMUL, Denom);
-  Num = Fractional ? Num / GCD : Num * LMUL / GCD;
-  Denom = Fractional ? Denom * LMUL / GCD : Denom / GCD;
-  return std::make_pair(Num > Denom ? Num : Denom, Denom > Num);
+  bool IsScaleFrac = EEW < SEW;
+  unsigned Scale = IsScaleFrac ? SEW / EEW : EEW / SEW;
+  auto [LMUL, IsLMULFrac] = RISCVVType::decodeVLMUL(VLMUL);
+
+  unsigned EMUL;
+  bool EMULFrac;
+  if ((IsScaleFrac && IsLMULFrac) || (!IsScaleFrac && !IsLMULFrac)) {
+    EMUL = LMUL * Scale;
+    EMULFrac = IsLMULFrac;
+  } else if (Scale > LMUL) {
+    EMUL = Scale / LMUL;
+    EMULFrac = IsScaleFrac;
+  } else {
+    EMUL = LMUL / Scale;
+    EMULFrac = IsLMULFrac;
+  }
+  if (EMUL == 1)
+    EMULFrac = false;
+
+  return RISCVVType::encodeLMUL(EMUL, EMULFrac);
 }
 
 static std::pair<uint8_t, uint8_t>
@@ -230,13 +239,8 @@ getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, uint8_t LMUL,
   }
 
   RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(LMUL);
-  auto [EMULPart, Fractional] =
-      getEMULEqualsEEWDivSEWTimesLMUL(EEW, SEW, VLMUL);
-  assert(RISCVVType::isValidLMUL(EMULPart, Fractional) &&
-         "Unexpected EEW from instruction used with LMUL and SEW");
-
-  uint8_t EMUL =
-      static_cast<RISCVII::VLMUL>(RISCVVType::encodeLMUL(EMULPart, Fractional));
+  uint8_t EMUL = static_cast<RISCVII::VLMUL>(
+      getEMULEqualsEEWDivSEWTimesLMUL(EEW, SEW, VLMUL));
   return std::make_pair(EEW, EMUL);
 }
 

>From 90bfd7ac5a129975ac35de223d0e7decf67598dc Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 19 Oct 2023 09:21:10 -0700
Subject: [PATCH 4/4] Use getSameRatioLMUL

---
 .../Target/RISCV/MCA/RISCVCustomBehaviour.cpp | 37 +++----------------
 1 file changed, 6 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index f70192435b80a87..aadf31eae341ed3 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -185,34 +185,8 @@ RISCVInstrumentManager::createInstruments(const MCInst &Inst) {
   return SmallVector<UniqueInstrument>();
 }
 
-/// Return EMUL = (EEW / SEW) * LMUL
-inline static RISCVII::VLMUL
-getEMULEqualsEEWDivSEWTimesLMUL(unsigned EEW, unsigned SEW,
-                                RISCVII::VLMUL VLMUL) {
-  bool IsScaleFrac = EEW < SEW;
-  unsigned Scale = IsScaleFrac ? SEW / EEW : EEW / SEW;
-  auto [LMUL, IsLMULFrac] = RISCVVType::decodeVLMUL(VLMUL);
-
-  unsigned EMUL;
-  bool EMULFrac;
-  if ((IsScaleFrac && IsLMULFrac) || (!IsScaleFrac && !IsLMULFrac)) {
-    EMUL = LMUL * Scale;
-    EMULFrac = IsLMULFrac;
-  } else if (Scale > LMUL) {
-    EMUL = Scale / LMUL;
-    EMULFrac = IsScaleFrac;
-  } else {
-    EMUL = LMUL / Scale;
-    EMULFrac = IsLMULFrac;
-  }
-  if (EMUL == 1)
-    EMULFrac = false;
-
-  return RISCVVType::encodeLMUL(EMUL, EMULFrac);
-}
-
 static std::pair<uint8_t, uint8_t>
-getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, uint8_t LMUL,
+getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL,
                                     uint8_t SEW) {
   uint8_t EEW;
   switch (Opcode) {
@@ -238,9 +212,8 @@ getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, uint8_t LMUL,
     llvm_unreachable("Opcode is not a vector unit stride load nor store");
   }
 
-  RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(LMUL);
-  uint8_t EMUL = static_cast<RISCVII::VLMUL>(
-      getEMULEqualsEEWDivSEWTimesLMUL(EEW, SEW, VLMUL));
+  uint8_t EMUL =
+      static_cast<uint8_t>(RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW));
   return std::make_pair(EEW, EMUL);
 }
 
@@ -280,7 +253,9 @@ unsigned RISCVInstrumentManager::getSchedClassID(
       Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V ||
       Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V ||
       Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V) {
-    auto [EEW, EMUL] = getEEWAndEMULForUnitStrideLoadStore(Opcode, LMUL, SEW);
+
+    RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(LMUL);
+    auto [EEW, EMUL] = getEEWAndEMULForUnitStrideLoadStore(Opcode, VLMUL, SEW);
     RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, EMUL, EEW);
   } else {
     // Check if it depends on LMUL and SEW



More information about the llvm-commits mailing list