[llvm] 84e95be - [RISCV] Update SiFive P600's scheduling model on RVV instructions (#115243)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 12 15:29:44 PST 2024
Author: Min-Yih Hsu
Date: 2024-11-12T15:29:40-08:00
New Revision: 84e95beae980466ffcc555297e0e34d23fca8a76
URL: https://github.com/llvm/llvm-project/commit/84e95beae980466ffcc555297e0e34d23fca8a76
DIFF: https://github.com/llvm/llvm-project/commit/84e95beae980466ffcc555297e0e34d23fca8a76.diff
LOG: [RISCV] Update SiFive P600's scheduling model on RVV instructions (#115243)
The biggest change is assigning vector crypto instructions to the
correct processor resource.
The majority of these changes are guided by our RVV-capable
llvm-exegesis.
Added:
llvm/test/tools/llvm-mca/RISCV/SiFiveP600/mask.s
llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s
Modified:
llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s
llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s
llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s
llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s
llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s
llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index 0543b999fd647d..c2d93d4c0a7f0a 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -50,6 +50,240 @@ class SiFiveP600GetCyclesSegmented<string mx, int sew, int nf> {
int c = !mul(VLUpperBound, nf);
}
+class SiFiveP600VSM3CCycles<string mx> {
+ // c = ceil(LMUL / 2)
+ int c = !cond(!eq(mx, "M2") : 1,
+ !eq(mx, "M4") : 2,
+ !eq(mx, "M8") : 4,
+ true : 1);
+}
+
+class SiFiveP600RVVMultiplier<string mx> {
+ int c = !if(!eq(mx, "M8"), 2, 1);
+}
+
+// ======================================================================
+// The latency and occupancy data in this section are primarily evaluated
+// from llvm-exegesis.
+// ======================================================================
+
+class SiFiveP600VCryptoLatency<string mx> {
+ int c = !cond(
+ !eq(mx, "M4"): 4,
+ !eq(mx, "M8"): 8,
+ true: 2
+ );
+}
+
+class SiFiveP600VFMinMaxReduction<string mx, int sew> {
+ defvar E64Lat = !cond(
+ !eq(mx, "M1") : 4,
+ !eq(mx, "M2") : 6,
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 10,
+ true: 2
+ );
+
+ defvar E64Cycles = !cond(
+ !eq(mx, "M1") : 3,
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 5,
+ !eq(mx, "M8") : 6,
+ true: 2
+ );
+
+ int latency = !if(!eq(sew, 64), E64Lat, !add(E64Lat, 2));
+ int cycles = !if(!eq(sew, 64), E64Cycles, !add(E64Cycles, 1));
+}
+
+class SiFiveP600VFUnorderedReduction<string mx, int sew> {
+ defvar E64Lat = !cond(
+ !eq(mx, "M1") : 6,
+ !eq(mx, "M2") : 12,
+ !eq(mx, "M4") : 18,
+ !eq(mx, "M8") : 24,
+ true: 2
+ );
+
+ defvar E32Cycles = !cond(
+ !eq(mx, "M1") : 10,
+ !eq(mx, "M2") : 10,
+ !eq(mx, "M4") : 11,
+ !eq(mx, "M8") : 11,
+ true: 6
+ );
+
+ int latency = !if(!eq(sew, 64), E64Lat, !add(E64Lat, 4));
+ int cycles = !if(!eq(sew, 64), 6, E32Cycles);
+}
+
+class SiFiveP600VFWidenUnorderedReduction<string mx> {
+ int latency = !cond(
+ !eq(mx, "M1") : 10,
+ !eq(mx, "M2") : 18,
+ !eq(mx, "M4") : 24,
+ !eq(mx, "M8") : 30,
+ true: 6
+ );
+}
+
+class SiFiveP600VFOrderedReduction<string mx, int sew> {
+ defvar Base = !if(!eq(sew, 64), 6, 10);
+ int c = !cond(
+ !eq(mx, "M1") : Base,
+ !eq(mx, "M2") : !mul(Base, 2),
+ !eq(mx, "M4") : !mul(Base, 4),
+ !eq(mx, "M8") : !mul(Base, 8),
+ true: 6
+ );
+}
+
+class SiFiveP600VIReductionLatency<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 16,
+ // M1 and lower
+ true: 2
+ );
+}
+
+class SiFiveP600VIMinMaxReductionLatency<string mx, int sew> {
+ // +-----+-----+-----+-----+----+
+ // | | E64 | E32 | E16 | E8 |
+ // +-----+-----+-----+-----+----+
+ // | MF8 | X | X | X | 4 |
+ // +-----+-----+-----+-----+----+
+ // | MF4 | X | X | 4 | 6 |
+ // +-----+-----+-----+-----+----+
+ // | MF2 | X | 4 | 6 | 8 |
+ // +-----+-----+-----+-----+----+
+ // | M1 | 4 | 6 | 8 | 10 |
+ // +-----+-----+-----+-----+----+
+ // | M2 | 6 | 8 | 10 | 12 |
+ // +-----+-----+-----+-----+----+
+ // | M4 | 8 | 10 | 12 | 14 |
+ // +-----+-----+-----+-----+----+
+ // | M8 | 10 | 12 | 14 | 16 |
+ // +-----+-----+-----+-----+----+
+ defvar BaseIndex = !cond(
+ !eq(sew, 64): 0,
+ !eq(sew, 32): 1,
+ !eq(sew, 16): 2,
+ !eq(sew, 8): 3
+ );
+
+ defvar Latencies = [4, 6, 8, 10, 12, 14, 16];
+
+ int c = !cond(
+ !eq(mx, "M1") : Latencies[BaseIndex],
+ !eq(mx, "M2") : Latencies[!add(BaseIndex, 1)],
+ !eq(mx, "M4") : Latencies[!add(BaseIndex, 2)],
+ !eq(mx, "M8") : Latencies[!add(BaseIndex, 3)],
+ // Fractional
+ !eq(mx, "MF2"): Latencies[!sub(BaseIndex, 1)],
+ !eq(mx, "MF4"): Latencies[!sub(BaseIndex, 2)],
+ !eq(mx, "MF8"): Latencies[!sub(BaseIndex, 3)],
+ );
+}
+
+class SiFiveP600VIMinMaxReductionCycles<string mx, int sew> {
+ // +-----+-----+-----+-----+----+
+ // | | E64 | E32 | E16 | E8 |
+ // +-----+-----+-----+-----+----+
+ // | MF8 | X | X | X | 3 |
+ // +-----+-----+-----+-----+----+
+ // | MF4 | X | X | 3 | 5 |
+ // +-----+-----+-----+-----+----+
+ // | MF2 | X | 3 | 5 | 6 |
+ // +-----+-----+-----+-----+----+
+ // | M1 | 3 | 4 | 6 | 8 |
+ // +-----+-----+-----+-----+----+
+ // | M2 | 4 | 5 | 8 | 9 |
+ // +-----+-----+-----+-----+----+
+ // | M4 | 5 | 6 | 10 | 11 |
+ // +-----+-----+-----+-----+----+
+ // | M8 | 7 | 8 | 9 | 11 |
+ // +-----+-----+-----+-----+----+
+ defvar Index = !cond(
+ !eq(sew, 64): 0,
+ !eq(sew, 32): 1,
+ !eq(sew, 16): 2,
+ !eq(sew, 8): 3
+ );
+
+ defvar Cycles = [
+ [0, 0, 0, 3],
+ [0, 0, 3, 5],
+ [0, 3, 5, 6],
+ [3, 4, 6, 8],
+ [4, 5, 8, 9],
+ [5, 6, 10, 11],
+ [7, 8, 9, 11]
+ ];
+
+ int c = !cond(
+ !eq(mx, "MF8"): Cycles[0][Index],
+ !eq(mx, "MF4"): Cycles[1][Index],
+ !eq(mx, "MF2"): Cycles[2][Index],
+ !eq(mx, "M1"): Cycles[3][Index],
+ !eq(mx, "M2"): Cycles[4][Index],
+ !eq(mx, "M4"): Cycles[5][Index],
+ !eq(mx, "M8"): Cycles[6][Index],
+ );
+}
+
+class SiFiveP600VSlide1<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 3,
+ !eq(mx, "M4") : 4,
+ !eq(mx, "M8") : 8,
+ // M1 and lower
+ true: 2
+ );
+}
+
+class SiFiveP600VSlideI<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 6,
+ !eq(mx, "M8") : 8,
+ // M1 and lower
+ true: 2
+ );
+}
+
+class SiFiveP600VSlideXComplex<string mx, bit isUp = false> {
+ int latency = !cond(
+ !eq(mx, "M2") : 11,
+ !eq(mx, "M4") : 14,
+ !eq(mx, "M8") : 20
+ );
+
+ int cycles = !cond(
+ !eq(mx, "M2") : !if(isUp, 10, 11),
+ !eq(mx, "M4") : !if(isUp, 12, 14),
+ !eq(mx, "M8") : !if(isUp, 16, 20)
+ );
+}
+
+class SiFiveP600VPermutationComplex<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 12,
+ !eq(mx, "M4") : 16,
+ !eq(mx, "M8") : 24
+ );
+}
+
+class SiFiveP600VSHA2MSCycles<string mx, int sew> {
+ int c = !cond(
+ !eq(mx, "M2") : !if(!eq(sew, 32), 2, 3),
+ !eq(mx, "M4") : !if(!eq(sew, 32), 4, 6),
+ !eq(mx, "M8") : !if(!eq(sew, 32), 8, 12),
+ true: 1
+ );
+}
+
// SiFiveP600 machine model for scheduling and other instruction cost heuristics.
def SiFiveP600Model : SchedMachineModel {
let IssueWidth = 4; // 4 micro-ops are dispatched per cycle.
@@ -95,6 +329,12 @@ def SiFiveP600FloatDiv : ProcResource<1>;
def SiFiveP600VEXQ0 : ProcResource<1>;
def SiFiveP600VEXQ1 : ProcResource<1>;
def SiFiveP600VectorArith : ProcResGroup<[SiFiveP600VEXQ0, SiFiveP600VEXQ1]>;
+
+// Only VEXQ0 has mask unit.
+defvar SiFiveP600VectorMask = SiFiveP600VEXQ0;
+// Only VEXQ0 has vector crypto.
+defvar SiFiveP600VectorCrypto = SiFiveP600VEXQ0;
+
def SiFiveP600VLD : ProcResource<1>;
def SiFiveP600VST : ProcResource<1>;
def SiFiveP600VDiv : ProcResource<1>;
@@ -386,7 +626,7 @@ foreach LMul = [1, 2, 4, 8] in {
def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SiFiveP600VLD]>;
def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SiFiveP600VST]>;
}
- let Latency = LMul, ReleaseAtCycles = [LMul] in {
+ let Latency = 2, ReleaseAtCycles = [LMul] in {
def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SiFiveP600VectorArith]>;
}
}
@@ -395,37 +635,42 @@ foreach LMul = [1, 2, 4, 8] in {
foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 1, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVIALUV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIALUX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIALUI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ let Latency = 2, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVExtV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUI", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpI", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpV", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpX", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpI", [SiFiveP600VectorMask], mx, IsWorstCase>;
}
- let Latency = 6, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVShiftV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftI", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ let ReleaseAtCycles = [LMulLat] in {
+ let Latency = 6 in {
+ defm "" : LMULWriteResMX<"WriteVIMulV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ }
+
+ let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c) in {
+ defm "" : LMULWriteResMX<"WriteVIALUV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ }
}
}
// Widening
@@ -440,7 +685,26 @@ foreach mx = SchedMxListW in {
defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+
+ // Special case for variants with widen operands.
+ let ReleaseAtCycles = [!mul(LMulLat, 2)] in
+ def P600WriteVIWALUWidenOp_ # mx : SchedWriteRes<[SiFiveP600VectorArith]>;
}
+
+ defvar P600VIWALUBaseSchedRW = [!cast<SchedWrite>("P600WriteVIWALUWidenOp_" # mx),
+ !cast<SchedRead>("ReadVPassthru_" # mx),
+ !cast<SchedRead>("ReadVIALUV_" # mx),
+ !cast<SchedRead>("ReadVIALUV_" # mx)];
+
+ def : InstRW<P600VIWALUBaseSchedRW,
+ (instregex "^PseudoVW(ADD|SUB)[U]?_W(V|X)_" # mx # "$")>;
+ def : InstRW<P600VIWALUBaseSchedRW[0,2,3],
+ (instregex "^PseudoVW(ADD|SUB)[U]?_WV_" # mx # "_TIED$")>;
+
+ def : InstRW<!listconcat(P600VIWALUBaseSchedRW, [!cast<SchedRead>("ReadVMask")]),
+ (instregex "^PseudoVW(ADD|SUB)[U]?_W(V|X)_" # mx # "_MASK$")>;
+ def : InstRW<!listconcat(P600VIWALUBaseSchedRW[0,1,3], [!cast<SchedRead>("ReadVMask")]),
+ (instregex "^PseudoVW(ADD|SUB)[U]?_WV_" # mx # "_MASK_TIED$")>;
}
// Worst case needs 51/45/42/72 * lmul cycles for i8/16/32/64.
@@ -504,34 +768,34 @@ foreach mx = SchedMxListF in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
- let Latency = 2, ReleaseAtCycles = [LMulLat] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
}
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, isF=1>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList, isF=1>.c;
- let Latency = 1, ReleaseAtCycles = [LMulLat] in {
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
+ let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c), ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
+ let Latency = !if(!eq(mx, "M8"), 4, 3), ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
}
foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in
+ let Latency = !if(!eq(mx, "M8"), 4, 3), ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in
defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFiveP600VectorMask], mx, IsWorstCase>;
}
- let Latency = 1, ReleaseAtCycles = [LMulLat] in {
+ let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c),
+ ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in {
defm "" : LMULWriteResMX<"WriteVFClassV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMovV", [SiFiveP600VectorArith], mx, IsWorstCase>;
@@ -565,7 +829,31 @@ foreach mx = SchedMxListFW in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+
+ // Special case for variants with widen operands.
+ let ReleaseAtCycles = [!mul(LMulLat, 2)] in
+ def P600WriteVFWALUWidenOp_ # mx # _E # sew : SchedWriteRes<[SiFiveP600VectorArith]>;
}
+
+ defvar P600VFWALUBaseSchedRW = [!cast<SchedWrite>("P600WriteVFWALUWidenOp_" # mx # "_E" # sew),
+ !cast<SchedRead>("ReadVPassthru_" # mx # "_E" # sew),
+ !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)];
+
+ def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)]),
+ (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "$")>;
+ def : InstRW<[P600VFWALUBaseSchedRW[0], P600VFWALUBaseSchedRW[2], !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)],
+ (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_TIED$")>;
+
+ def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUF_" # mx # "_E" # sew)]),
+ (instregex "^PseudoVFW(ADD|SUB)_WFPR" # sew # "_" # mx # "_E" # sew # "$")>;
+
+ def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")]),
+ (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_MASK$")>;
+ def : InstRW<[P600VFWALUBaseSchedRW[0], P600VFWALUBaseSchedRW[1], !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")],
+ (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_MASK_TIED$")>;
+
+ def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUF_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")]),
+ (instregex "^PseudoVFW(ADD|SUB)_WFPR" # sew # "_" # mx # "_E" # sew # "_MASK$")>;
}
}
// Narrowing
@@ -580,7 +868,7 @@ foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in {
+ let Latency = 3, ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
@@ -608,9 +896,14 @@ foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- let Latency = !add(2, !mul(2, LMulLat)), ReleaseAtCycles = [LMulLat] in {
+
+ let ReleaseAtCycles = [LMulLat] in {
+ let Latency = SiFiveP600VIReductionLatency<mx>.c in
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFiveP600VEXQ1],
mx, sew, IsWorstCase>;
+
+ let Latency = SiFiveP600VIMinMaxReductionLatency<mx, sew>.c,
+ ReleaseAtCycles = [SiFiveP600VIMinMaxReductionCycles<mx, sew>.c] in
defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFiveP600VEXQ1],
mx, sew, IsWorstCase>;
}
@@ -621,7 +914,7 @@ foreach mx = SchedMxListWRed in {
foreach sew = SchedSEWSet<mx, 0, 1>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
- let Latency = !add(2, !mul(2, LMulLat)), ReleaseAtCycles = [LMulLat] in {
+ let Latency = SiFiveP600VIReductionLatency<mx>.c, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFiveP600VEXQ1],
mx, sew, IsWorstCase>;
}
@@ -632,14 +925,20 @@ foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
- let Latency = !add(6, !mul(6, LMulLat)), ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFiveP600VEXQ1],
- mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From",
- [SiFiveP600VEXQ1], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFiveP600VEXQ1],
- mx, sew, IsWorstCase>;
- }
+ let Latency = SiFiveP600VFMinMaxReduction<mx, sew>.latency,
+ ReleaseAtCycles = [SiFiveP600VFMinMaxReduction<mx, sew>.cycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From",
+ [SiFiveP600VEXQ1], mx, sew, IsWorstCase>;
+
+ let Latency = SiFiveP600VFUnorderedReduction<mx, sew>.latency,
+ ReleaseAtCycles = [SiFiveP600VFUnorderedReduction<mx, sew>.cycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFiveP600VEXQ1],
+ mx, sew, IsWorstCase>;
+
+ let Latency = SiFiveP600VFOrderedReduction<mx, sew>.c,
+ ReleaseAtCycles = [SiFiveP600VFOrderedReduction<mx, sew>.c] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFiveP600VEXQ1],
+ mx, sew, IsWorstCase>;
}
}
@@ -647,28 +946,38 @@ foreach mx = SchedMxListFWRed in {
foreach sew = SchedSEWSet<mx, 1, 1>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
- let Latency = !add(6, !mul(6, LMulLat)), ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFiveP600VEXQ1],
- mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFiveP600VEXQ1],
- mx, sew, IsWorstCase>;
- }
+ let Latency = SiFiveP600VFWidenUnorderedReduction<mx>.latency,
+ ReleaseAtCycles = [6] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFiveP600VEXQ1],
+ mx, sew, IsWorstCase>;
+
+ let Latency = SiFiveP600VFOrderedReduction<mx, sew>.c,
+ ReleaseAtCycles = [SiFiveP600VFOrderedReduction<mx, sew>.c] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFiveP600VEXQ1],
+ mx, sew, IsWorstCase>;
}
}
// 15. Vector Mask Instructions
foreach mx = SchedMxList in {
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 1, ReleaseAtCycles = [1] in {
- defm "" : LMULWriteResMX<"WriteVMALUV", [SiFiveP600VEXQ0], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMPopV", [SiFiveP600VEXQ0], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFiveP600VEXQ0], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFiveP600VEXQ0], mx, IsWorstCase>;
+ let Latency = 2 in {
+ defm "" : LMULWriteResMX<"WriteVMALUV", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFiveP600VectorMask], mx, IsWorstCase>;
+
+ let ReleaseAtCycles = [2] in {
+ defm "" : LMULWriteResMX<"WriteVMPopV", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ }
}
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- let Latency = 1, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVIotaV", [SiFiveP600VEXQ0], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIdxV", [SiFiveP600VEXQ0], mx, IsWorstCase>;
+ let ReleaseAtCycles = [LMulLat] in {
+ let Latency = 2 in
+ defm "" : LMULWriteResMX<"WriteVIotaV", [SiFiveP600VectorMask], mx, IsWorstCase>;
+
+ // vid.v isn't executed by the mask unit.
+ let Latency = !if(!eq(mx, "M8"), 4, !if(!eq(mx, "M4"), 2, 1)) in
+ defm "" : LMULWriteResMX<"WriteVIdxV", [SiFiveP600VectorArith], mx, IsWorstCase>;
}
}
@@ -677,12 +986,14 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 2, ReleaseAtCycles = [LMulLat] in {
+ let ReleaseAtCycles = [LMulLat] in {
+ let Latency = SiFiveP600VSlideI<mx>.c in
defm "" : LMULWriteResMX<"WriteVSlideI", [SiFiveP600VEXQ0], mx, IsWorstCase>;
- }
- let Latency = 1, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFiveP600VEXQ0], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFiveP600VEXQ0], mx, IsWorstCase>;
+
+ let Latency = SiFiveP600VSlide1<mx>.c in {
+ defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFiveP600VEXQ0], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFiveP600VEXQ0], mx, IsWorstCase>;
+ }
}
}
foreach mx = ["MF8", "MF4", "MF2", "M1"] in {
@@ -697,19 +1008,19 @@ foreach mx = ["MF8", "MF4", "MF2", "M1"] in {
foreach mx = ["M8", "M4", "M2"] in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = !add(4, LMulLat), ReleaseAtCycles = [LMulLat] in {
+ let Latency = SiFiveP600VSlideXComplex<mx>.latency in {
+ let ReleaseAtCycles = [SiFiveP600VSlideXComplex<mx, /*isUp=*/true>.cycles] in
defm "" : LMULWriteResMX<"WriteVSlideUpX", [SiFiveP600VEXQ1], mx, IsWorstCase>;
+ let ReleaseAtCycles = [SiFiveP600VSlideXComplex<mx, /*isUp=*/false>.cycles] in
defm "" : LMULWriteResMX<"WriteVSlideDownX", [SiFiveP600VEXQ1], mx, IsWorstCase>;
}
}
-let Latency = 2, ReleaseAtCycles = [1] in {
- def : WriteRes<WriteVMovSX, [SiFiveP600VectorArith]>;
+let Latency = 2, ReleaseAtCycles = [2] in {
def : WriteRes<WriteVMovXS, [SiFiveP600VectorArith]>;
-}
-let Latency = 6, ReleaseAtCycles = [1] in {
- def : WriteRes<WriteVMovSF, [SiFiveP600VectorArith]>;
+ def : WriteRes<WriteVMovSX, [SiFiveP600VectorArith]>;
def : WriteRes<WriteVMovFS, [SiFiveP600VectorArith]>;
+ def : WriteRes<WriteVMovSF, [SiFiveP600VectorArith]>;
}
// Simple Gather and Compress
@@ -735,7 +1046,7 @@ foreach mx = ["MF8", "MF4", "MF2", "M1"] in {
foreach mx = ["M2", "M4", "M8"] in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 6, ReleaseAtCycles = [LMulLat] in {
+ let Latency = 6, ReleaseAtCycles = [SiFiveP600VPermutationComplex<mx>.c] in {
defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFiveP600VEXQ1], mx, IsWorstCase>;
}
}
@@ -744,9 +1055,13 @@ foreach mx = ["M2", "M4", "M8"] in {
foreach sew = SchedSEWSet<mx>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- let Latency = 6, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>;
+ let Latency = 6 in {
+ let ReleaseAtCycles = [SiFiveP600VPermutationComplex<mx>.c] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>;
+ }
+
+ let ReleaseAtCycles = [!add(SiFiveP600VPermutationComplex<mx>.c, 1)] in
defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>;
}
}
@@ -766,55 +1081,84 @@ foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
// Zvbb
- let Latency = 2, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVBREVV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVCLZV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVCPOPV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVCTZV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVWSLLV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVWSLLX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVWSLLI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ let ReleaseAtCycles = [LMulLat] in {
+ let Latency = 2 in {
+ // FIXME: Exegesis was not able to measure the latency of these instructions.
+ // We probably should update them at some point.
+ defm "" : LMULWriteResMX<"WriteVCPOPV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVWSLLV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVWSLLX", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVWSLLI", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ }
+
+ let Latency = SiFiveP600VCryptoLatency<mx>.c in {
+ defm "" : LMULWriteResMX<"WriteVBREVV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVCLZV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVCTZV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+
+ def P600WriteVANDN_ # mx : SchedWriteRes<[SiFiveP600VectorCrypto]>;
+ }
}
+
+ // Special case for VANDN -- we execute it on vector crypto unit.
+ defvar P600VANDNBaseSchedRW = [!cast<SchedWrite>("P600WriteVANDN_" # mx),
+ // VANDN always merge read operand.
+ !cast<SchedRead>("ReadVPassthru_" # mx),
+ !cast<SchedRead>("ReadVIALUV_" # mx),
+ !cast<SchedRead>("ReadVIALUV_" # mx)];
+ def : InstRW<P600VANDNBaseSchedRW,
+ (instregex "^PseudoVANDN_V(V|X)_" # mx # "$")>;
+ def : InstRW<!listconcat(P600VANDNBaseSchedRW, [!cast<SchedRead>("ReadVMask")]),
+ (instregex "^PseudoVANDN_V(V|X)_" # mx # "_MASK$")>;
+
// Zvbc
- let Latency = 2, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVCLMULV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVCLMULX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ let Latency = SiFiveP600VCryptoLatency<mx>.c, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULWriteResMX<"WriteVCLMULV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVCLMULX", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
}
// Zvkb
// VANDN uses WriteVIALU[V|X|I]
- let Latency = 2, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVBREV8V", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVREV8V", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVRotV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVRotX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVRotI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ let Latency = SiFiveP600VCryptoLatency<mx>.c, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULWriteResMX<"WriteVBREV8V", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVREV8V", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVRotV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVRotX", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVRotI", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
}
// Zvkg
- let Latency = 2, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVGHSHV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVGMULV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ let Latency = SiFiveP600VCryptoLatency<mx>.c, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULWriteResMX<"WriteVGHSHV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVGMULV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
}
// ZvknhaOrZvknhb
+ // FIXME: The latency is probably wrong.
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
- defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in {
+ let ReleaseAtCycles = [SiFiveP600VSHA2MSCycles<mx, sew>.c] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorCrypto], mx, sew, IsWorstCase>;
+ }
}
// Zvkned
- let Latency = 2, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVAESMVV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAESKF1V", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAESKF2V", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ let Latency = 2 in {
+ let ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULWriteResMX<"WriteVAESMVV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAESKF1V", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAESKF2V", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ }
+
+ let ReleaseAtCycles = [!if(!lt(LMulLat, 2), LMulLat, !div(LMulLat, 2))] in
+ defm "" : LMULWriteResMX<"WriteVAESZV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
}
- let Latency = 1, ReleaseAtCycles = [LMulLat] in
- defm "" : LMULWriteResMX<"WriteVAESZV", [SiFiveP600VectorArith], mx, IsWorstCase>;
// Zvksed
+ let Latency = 3, ReleaseAtCycles = [SiFiveP600VSM3CCycles<mx>.c] in
+ defm "" : LMULWriteResMX<"WriteVSM3CV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ let Latency = 6, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULWriteResMX<"WriteVSM3MEV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVSM4KV", [SiFiveP600VEXQ0], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSM4RV", [SiFiveP600VEXQ0], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSM3CV", [SiFiveP600VEXQ0], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSM3MEV", [SiFiveP600VEXQ0], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSM4KV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSM4RV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
}
}
@@ -1029,7 +1373,7 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
-defm "" : LMULSEWReadAdvance<"ReadVFRecpV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/mask.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/mask.s
new file mode 100644
index 00000000000000..adb45fceda3728
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/mask.s
@@ -0,0 +1,129 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=100 < %s | FileCheck %s
+
+vsetvli zero, zero, e32, m1, ta, ma
+
+vmslt.vv v0, v4, v20
+vmsle.vv v8, v4, v20
+vmsgt.vv v8, v20, v4
+vmsge.vv v8, v20, v4
+vmseq.vv v8, v4, v20
+vmsne.vv v8, v4, v20
+vmsltu.vv v8, v4, v20
+vmsleu.vv v8, v4, v20
+vmsgtu.vv v8, v20, v4
+vmsgeu.vv v8, v20, v4
+
+vmflt.vv v0, v4, v20
+vmfle.vv v8, v4, v20
+vmfgt.vv v8, v20, v4
+vmfge.vv v8, v20, v4
+vmfeq.vv v8, v4, v20
+vmfne.vv v8, v4, v20
+
+vmadc.vv v8, v4, v20
+vmsbc.vv v8, v4, v20
+
+vfirst.m a2, v4
+vpopc.m a2, v4
+
+viota.m v8, v4
+
+vmsbf.m v8, v4
+vmsif.m v8, v4
+vmsof.m v8, v4
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 2500
+# CHECK-NEXT: Total Cycles: 2605
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.96
+# CHECK-NEXT: IPC: 0.96
+# CHECK-NEXT: Block RThroughput: 26.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, ta, ma
+# CHECK-NEXT: 1 2 1.00 vmslt.vv v0, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmsle.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmslt.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmsle.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmseq.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmsne.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmsltu.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmsleu.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmsltu.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmsleu.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmflt.vv v0, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmfle.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmflt.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmfle.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmfeq.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmfne.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmadc.vv v8, v4, v20
+# CHECK-NEXT: 1 2 1.00 vmsbc.vv v8, v4, v20
+# CHECK-NEXT: 1 2 2.00 vfirst.m a2, v4
+# CHECK-NEXT: 1 2 2.00 vcpop.m a2, v4
+# CHECK-NEXT: 1 2 1.00 viota.m v8, v4
+# CHECK-NEXT: 1 2 1.00 vmsbf.m v8, v4
+# CHECK-NEXT: 1 2 1.00 vmsif.m v8, v4
+# CHECK-NEXT: 1 2 1.00 vmsof.m v8, v4
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SiFiveP600Div
+# CHECK-NEXT: [1] - SiFiveP600FEXQ0
+# CHECK-NEXT: [2] - SiFiveP600FEXQ1
+# CHECK-NEXT: [3] - SiFiveP600FloatDiv
+# CHECK-NEXT: [4] - SiFiveP600IEXQ0
+# CHECK-NEXT: [5] - SiFiveP600IEXQ1
+# CHECK-NEXT: [6] - SiFiveP600IEXQ2
+# CHECK-NEXT: [7] - SiFiveP600IEXQ3
+# CHECK-NEXT: [8.0] - SiFiveP600LDST
+# CHECK-NEXT: [8.1] - SiFiveP600LDST
+# CHECK-NEXT: [9] - SiFiveP600VDiv
+# CHECK-NEXT: [10] - SiFiveP600VEXQ0
+# CHECK-NEXT: [11] - SiFiveP600VEXQ1
+# CHECK-NEXT: [12] - SiFiveP600VFloatDiv
+# CHECK-NEXT: [13] - SiFiveP600VLD
+# CHECK-NEXT: [14] - SiFiveP600VST
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
+# CHECK-NEXT: - - - - 1.00 - - - - - - 26.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, ta, ma
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmslt.vv v0, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsle.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmslt.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsle.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmseq.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsne.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsltu.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsleu.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsltu.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsleu.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmflt.vv v0, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmfle.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmflt.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmfle.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmfeq.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmfne.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmadc.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsbc.vv v8, v4, v20
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vfirst.m a2, v4
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vcpop.m a2, v4
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - viota.m v8, v4
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsbf.m v8, v4
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsif.m v8, v4
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsof.m v8, v4
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s
new file mode 100644
index 00000000000000..3e9dcff4e1c0ac
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s
@@ -0,0 +1,816 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=1 < %s | FileCheck %s
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, mf4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, mf2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, m2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, m4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, m8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, mf8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, mf4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, mf2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, m2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, m4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e16, m8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, mf8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, mf4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, mf2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, m2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, m4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e32, m8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, mf8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, mf4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, mf2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, m2, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, m4, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e64, m8, tu, mu
+vmv1r.v v8, v16
+vsetvli zero, zero, e8, mf8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, mf4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, mf2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, m2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, m4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, m8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, mf8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, mf4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, mf2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, m2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, m4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e16, m8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, mf8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, mf4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, mf2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, m2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, m4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e32, m8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, mf8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, mf4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, mf2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, m2, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, m4, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e64, m8, tu, mu
+vmv2r.v v8, v16
+vsetvli zero, zero, e8, mf8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, mf4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, mf2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, m2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, m4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, m8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, mf8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, mf4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, mf2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, m2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, m4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e16, m8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, mf8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, mf4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, mf2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, m2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, m4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e32, m8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, mf8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, mf4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, mf2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, m2, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, m4, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e64, m8, tu, mu
+vmv4r.v v8, v16
+vsetvli zero, zero, e8, mf8, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, mf4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, mf2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, m2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, m4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e8, m8, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, mf8, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, mf4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, mf2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, m2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, m4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e16, m8, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, mf8, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, mf4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, mf2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, m2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, m4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e32, m8, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, mf8, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, mf4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, mf2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, m2, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, m4, tu, mu
+vmv8r.v v8, v16
+vsetvli zero, zero, e64, m8, tu, mu
+vmv8r.v v8, v16
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 256
+# CHECK-NEXT: Total Cycles: 237
+# CHECK-NEXT: Total uOps: 256
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.08
+# CHECK-NEXT: IPC: 1.08
+# CHECK-NEXT: Block RThroughput: 240.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu
+# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu
+# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu
+# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu
+# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SiFiveP600Div
+# CHECK-NEXT: [1] - SiFiveP600FEXQ0
+# CHECK-NEXT: [2] - SiFiveP600FEXQ1
+# CHECK-NEXT: [3] - SiFiveP600FloatDiv
+# CHECK-NEXT: [4] - SiFiveP600IEXQ0
+# CHECK-NEXT: [5] - SiFiveP600IEXQ1
+# CHECK-NEXT: [6] - SiFiveP600IEXQ2
+# CHECK-NEXT: [7] - SiFiveP600IEXQ3
+# CHECK-NEXT: [8.0] - SiFiveP600LDST
+# CHECK-NEXT: [8.1] - SiFiveP600LDST
+# CHECK-NEXT: [9] - SiFiveP600VDiv
+# CHECK-NEXT: [10] - SiFiveP600VEXQ0
+# CHECK-NEXT: [11] - SiFiveP600VEXQ1
+# CHECK-NEXT: [12] - SiFiveP600VFloatDiv
+# CHECK-NEXT: [13] - SiFiveP600VLD
+# CHECK-NEXT: [14] - SiFiveP600VST
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
+# CHECK-NEXT: - - - - 128.00 - - - - - - 240.00 240.00 - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16
+# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s
index 61915375dd28b8..3726d821c41321 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s
@@ -151,13 +151,13 @@ vwsll.vi v8, v4, 8
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 133
-# CHECK-NEXT: Total Cycles: 166
+# CHECK-NEXT: Total Cycles: 330
# CHECK-NEXT: Total uOps: 133
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.80
-# CHECK-NEXT: IPC: 0.80
-# CHECK-NEXT: Block RThroughput: 164.0
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.40
+# CHECK-NEXT: Block RThroughput: 328.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -169,76 +169,59 @@ vwsll.vi v8, v4, 8
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12
-# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8
-# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8
+# CHECK-NEXT: 1 2 1.00 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vbrev.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vbrev8.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vrev8.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vclz.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vctz.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vcpop.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 2 1.00 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vwsll.vi v4, v8, 8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12
-# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8
-# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8
+# CHECK-NEXT: 1 2 1.00 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vbrev.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vbrev8.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vrev8.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vclz.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vctz.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vcpop.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 2 1.00 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vwsll.vi v4, v8, 8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12
-# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8
-# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8
+# CHECK-NEXT: 1 2 1.00 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vbrev.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vbrev8.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vrev8.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vclz.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vctz.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vcpop.v v4, v8
+# CHECK-NEXT: 1 2 1.00 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 2 1.00 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vwsll.vi v4, v8, 8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12
-# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8
-# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8
-# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 vandn.vv v4, v8, v12
-# CHECK-NEXT: 1 1 1.00 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 2 1.00 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vandn.vx v4, v8, a0
# CHECK-NEXT: 1 2 1.00 vbrev.v v4, v8
# CHECK-NEXT: 1 2 1.00 vbrev8.v v4, v8
# CHECK-NEXT: 1 2 1.00 vrev8.v v4, v8
@@ -253,9 +236,9 @@ vwsll.vi v8, v4, 8
# CHECK-NEXT: 1 2 1.00 vwsll.vv v4, v8, v12
# CHECK-NEXT: 1 2 1.00 vwsll.vx v4, v8, a0
# CHECK-NEXT: 1 2 1.00 vwsll.vi v4, v8, 8
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 2.00 vandn.vv v4, v8, v12
-# CHECK-NEXT: 1 1 2.00 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: 1 2 2.00 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 2 2.00 vandn.vx v4, v8, a0
# CHECK-NEXT: 1 2 2.00 vbrev.v v4, v8
# CHECK-NEXT: 1 2 2.00 vbrev8.v v4, v8
# CHECK-NEXT: 1 2 2.00 vrev8.v v4, v8
@@ -267,40 +250,57 @@ vwsll.vi v8, v4, 8
# CHECK-NEXT: 1 2 2.00 vror.vv v4, v8, v12
# CHECK-NEXT: 1 2 2.00 vror.vx v4, v8, a0
# CHECK-NEXT: 1 2 2.00 vror.vi v4, v8, 8
-# CHECK-NEXT: 1 2 2.00 vwsll.vv v8, v4, v12
-# CHECK-NEXT: 1 2 2.00 vwsll.vx v8, v4, a0
-# CHECK-NEXT: 1 2 2.00 vwsll.vi v8, v4, 8
+# CHECK-NEXT: 1 2 2.00 vwsll.vv v4, v8, v12
+# CHECK-NEXT: 1 2 2.00 vwsll.vx v4, v8, a0
+# CHECK-NEXT: 1 2 2.00 vwsll.vi v4, v8, 8
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 4 4.00 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 4 4.00 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 4 4.00 vbrev.v v4, v8
+# CHECK-NEXT: 1 4 4.00 vbrev8.v v4, v8
+# CHECK-NEXT: 1 4 4.00 vrev8.v v4, v8
+# CHECK-NEXT: 1 4 4.00 vclz.v v4, v8
+# CHECK-NEXT: 1 4 4.00 vctz.v v4, v8
+# CHECK-NEXT: 1 2 4.00 vcpop.v v4, v8
+# CHECK-NEXT: 1 4 4.00 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 4 4.00 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 4 4.00 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 4 4.00 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 4 4.00 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 2 4.00 vwsll.vv v8, v4, v12
+# CHECK-NEXT: 1 2 4.00 vwsll.vx v8, v4, a0
+# CHECK-NEXT: 1 2 4.00 vwsll.vi v8, v4, 8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 4.00 vandn.vv v8, v16, v24
-# CHECK-NEXT: 1 1 4.00 vandn.vx v8, v16, a0
-# CHECK-NEXT: 1 2 4.00 vbrev.v v8, v16
-# CHECK-NEXT: 1 2 4.00 vbrev8.v v8, v16
-# CHECK-NEXT: 1 2 4.00 vrev8.v v8, v16
-# CHECK-NEXT: 1 2 4.00 vclz.v v8, v16
-# CHECK-NEXT: 1 2 4.00 vctz.v v8, v16
-# CHECK-NEXT: 1 2 4.00 vcpop.v v8, v16
-# CHECK-NEXT: 1 2 4.00 vrol.vv v8, v16, v24
-# CHECK-NEXT: 1 2 4.00 vrol.vx v8, v16, a0
-# CHECK-NEXT: 1 2 4.00 vror.vv v8, v16, v24
-# CHECK-NEXT: 1 2 4.00 vror.vx v8, v16, a0
-# CHECK-NEXT: 1 2 4.00 vror.vi v8, v16, 8
+# CHECK-NEXT: 1 8 8.00 vandn.vv v8, v16, v24
+# CHECK-NEXT: 1 8 8.00 vandn.vx v8, v16, a0
+# CHECK-NEXT: 1 8 8.00 vbrev.v v8, v16
+# CHECK-NEXT: 1 8 8.00 vbrev8.v v8, v16
+# CHECK-NEXT: 1 8 8.00 vrev8.v v8, v16
+# CHECK-NEXT: 1 8 8.00 vclz.v v8, v16
+# CHECK-NEXT: 1 8 8.00 vctz.v v8, v16
+# CHECK-NEXT: 1 2 8.00 vcpop.v v8, v16
+# CHECK-NEXT: 1 8 8.00 vrol.vv v8, v16, v24
+# CHECK-NEXT: 1 8 8.00 vrol.vx v8, v16, a0
+# CHECK-NEXT: 1 8 8.00 vror.vv v8, v16, v24
+# CHECK-NEXT: 1 8 8.00 vror.vx v8, v16, a0
+# CHECK-NEXT: 1 8 8.00 vror.vi v8, v16, 8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 2.00 vandn.vv v4, v8, v12
-# CHECK-NEXT: 1 1 2.00 vandn.vx v4, v8, a0
-# CHECK-NEXT: 1 2 2.00 vbrev.v v4, v8
-# CHECK-NEXT: 1 2 2.00 vbrev8.v v4, v8
-# CHECK-NEXT: 1 2 2.00 vrev8.v v4, v8
-# CHECK-NEXT: 1 2 2.00 vclz.v v4, v8
-# CHECK-NEXT: 1 2 2.00 vctz.v v4, v8
-# CHECK-NEXT: 1 2 2.00 vcpop.v v4, v8
-# CHECK-NEXT: 1 2 2.00 vrol.vv v4, v8, v12
-# CHECK-NEXT: 1 2 2.00 vrol.vx v4, v8, a0
-# CHECK-NEXT: 1 2 2.00 vror.vv v4, v8, v12
-# CHECK-NEXT: 1 2 2.00 vror.vx v4, v8, a0
-# CHECK-NEXT: 1 2 2.00 vror.vi v4, v8, 8
-# CHECK-NEXT: 1 2 2.00 vwsll.vv v8, v4, v12
-# CHECK-NEXT: 1 2 2.00 vwsll.vx v8, v4, a0
-# CHECK-NEXT: 1 2 2.00 vwsll.vi v8, v4, 8
+# CHECK-NEXT: 1 4 4.00 vandn.vv v4, v8, v12
+# CHECK-NEXT: 1 4 4.00 vandn.vx v4, v8, a0
+# CHECK-NEXT: 1 4 4.00 vbrev.v v4, v8
+# CHECK-NEXT: 1 4 4.00 vbrev8.v v4, v8
+# CHECK-NEXT: 1 4 4.00 vrev8.v v4, v8
+# CHECK-NEXT: 1 4 4.00 vclz.v v4, v8
+# CHECK-NEXT: 1 4 4.00 vctz.v v4, v8
+# CHECK-NEXT: 1 2 4.00 vcpop.v v4, v8
+# CHECK-NEXT: 1 4 4.00 vrol.vv v4, v8, v12
+# CHECK-NEXT: 1 4 4.00 vrol.vx v4, v8, a0
+# CHECK-NEXT: 1 4 4.00 vror.vv v4, v8, v12
+# CHECK-NEXT: 1 4 4.00 vror.vx v4, v8, a0
+# CHECK-NEXT: 1 4 4.00 vror.vi v4, v8, 8
+# CHECK-NEXT: 1 2 4.00 vwsll.vv v8, v4, v12
+# CHECK-NEXT: 1 2 4.00 vwsll.vx v8, v4, a0
+# CHECK-NEXT: 1 2 4.00 vwsll.vi v8, v4, 8
# CHECK: Resources:
# CHECK-NEXT: [0] - SiFiveP600Div
@@ -322,140 +322,140 @@ vwsll.vi v8, v4, 8
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
-# CHECK-NEXT: - - - - 8.00 - - - - - - 164.00 164.00 - - -
+# CHECK-NEXT: - - - - 8.00 - - - - - - 328.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vandn.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vbrev.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev8.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrev8.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrev8.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclz.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vctz.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vctz.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vcpop.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrol.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vi v4, v8, 8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vwsll.vx v4, v8, a0
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vx v4, v8, a0
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vi v4, v8, 8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vandn.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vbrev.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev8.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrev8.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrev8.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclz.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vctz.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vctz.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vcpop.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrol.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vi v4, v8, 8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vwsll.vx v4, v8, a0
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vx v4, v8, a0
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vi v4, v8, 8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vandn.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vbrev.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev8.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrev8.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrev8.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclz.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vctz.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vctz.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vcpop.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrol.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vi v4, v8, 8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vwsll.vx v4, v8, a0
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vx v4, v8, a0
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vi v4, v8, 8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vandn.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vbrev.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev8.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrev8.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrev8.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclz.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vctz.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vctz.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vcpop.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrol.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vi v4, v8, 8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vwsll.vx v4, v8, a0
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vx v4, v8, a0
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vi v4, v8, 8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vandn.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vandn.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vandn.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vbrev.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vbrev.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vbrev8.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vrev8.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vrev8.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vclz.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vctz.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vctz.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vcpop.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vrol.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vrol.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vrol.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vror.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vror.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vror.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vror.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vror.vi v4, v8, 8
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vwsll.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vwsll.vx v4, v8, a0
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vwsll.vx v4, v8, a0
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vwsll.vi v4, v8, 8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vandn.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vandn.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vandn.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vbrev.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vbrev.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vbrev8.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vrev8.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrev8.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclz.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vctz.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vctz.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vcpop.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vrol.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrol.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrol.vx v4, v8, a0
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vror.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vror.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vx v4, v8, a0
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vi v4, v8, 8
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vv v8, v4, v12
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vwsll.vx v8, v4, a0
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vx v8, v4, a0
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vi v8, v4, 8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vandn.vv v8, v16, v24
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vandn.vx v8, v16, a0
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vandn.vx v8, v16, a0
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vbrev.v v8, v16
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vbrev8.v v8, v16
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vbrev8.v v8, v16
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vrev8.v v8, v16
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vclz.v v8, v16
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vclz.v v8, v16
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vctz.v v8, v16
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vcpop.v v8, v16
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vcpop.v v8, v16
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vrol.vv v8, v16, v24
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vrol.vx v8, v16, a0
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vrol.vx v8, v16, a0
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vror.vv v8, v16, v24
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vror.vx v8, v16, a0
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vror.vi v8, v16, 8
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vror.vx v8, v16, a0
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vror.vi v8, v16, 8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m4, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vandn.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vandn.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vbrev.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vbrev.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vbrev8.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vrev8.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrev8.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclz.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vctz.v v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vctz.v v4, v8
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vcpop.v v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vrol.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrol.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrol.vx v4, v8, a0
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vror.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vror.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vx v4, v8, a0
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vi v4, v8, 8
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vv v8, v4, v12
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vwsll.vx v8, v4, a0
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vx v8, v4, a0
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vi v8, v4, 8
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s
index faf75234ff3b78..d8f926d8a67930 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s
@@ -29,13 +29,13 @@ vclmulh.vx v8, v12, a0
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 20
-# CHECK-NEXT: Total Cycles: 28
+# CHECK-NEXT: Total Cycles: 64
# CHECK-NEXT: Total uOps: 20
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.71
-# CHECK-NEXT: IPC: 0.71
-# CHECK-NEXT: Block RThroughput: 30.0
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 60.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -47,25 +47,25 @@ vclmulh.vx v8, v12, a0
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 2 0.50 vclmul.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vclmul.vx v4, v8, a0
-# CHECK-NEXT: 1 2 0.50 vclmulh.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vclmulh.vx v4, v8, a0
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu
# CHECK-NEXT: 1 2 1.00 vclmul.vv v4, v8, v12
# CHECK-NEXT: 1 2 1.00 vclmul.vx v4, v8, a0
# CHECK-NEXT: 1 2 1.00 vclmulh.vv v4, v8, v12
# CHECK-NEXT: 1 2 1.00 vclmulh.vx v4, v8, a0
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu
# CHECK-NEXT: 1 2 2.00 vclmul.vv v4, v8, v12
# CHECK-NEXT: 1 2 2.00 vclmul.vx v4, v8, a0
# CHECK-NEXT: 1 2 2.00 vclmulh.vv v4, v8, v12
# CHECK-NEXT: 1 2 2.00 vclmulh.vx v4, v8, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: 1 4 4.00 vclmul.vv v4, v8, v12
+# CHECK-NEXT: 1 4 4.00 vclmul.vx v4, v8, a0
+# CHECK-NEXT: 1 4 4.00 vclmulh.vv v4, v8, v12
+# CHECK-NEXT: 1 4 4.00 vclmulh.vx v4, v8, a0
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 2 4.00 vclmul.vv v8, v12, v24
-# CHECK-NEXT: 1 2 4.00 vclmul.vx v8, v12, a0
-# CHECK-NEXT: 1 2 4.00 vclmulh.vv v8, v12, v24
-# CHECK-NEXT: 1 2 4.00 vclmulh.vx v8, v12, a0
+# CHECK-NEXT: 1 8 8.00 vclmul.vv v8, v12, v24
+# CHECK-NEXT: 1 8 8.00 vclmul.vx v8, v12, a0
+# CHECK-NEXT: 1 8 8.00 vclmulh.vv v8, v12, v24
+# CHECK-NEXT: 1 8 8.00 vclmulh.vx v8, v12, a0
# CHECK: Resources:
# CHECK-NEXT: [0] - SiFiveP600Div
@@ -87,27 +87,27 @@ vclmulh.vx v8, v12, a0
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
-# CHECK-NEXT: - - - - 4.00 - - - - - - 30.00 30.00 - - -
+# CHECK-NEXT: - - - - 4.00 - - - - - - 60.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vclmul.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclmul.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclmul.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vclmulh.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclmulh.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclmulh.vx v4, v8, a0
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vclmul.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vclmul.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vclmul.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vclmulh.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vclmulh.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vclmulh.vx v4, v8, a0
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vclmul.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclmul.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclmul.vx v4, v8, a0
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vclmulh.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclmulh.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclmulh.vx v4, v8, a0
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vclmul.vv v8, v12, v24
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vclmul.vv v8, v12, v24
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vclmul.vx v8, v12, a0
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vclmulh.vv v8, v12, v24
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vclmulh.vv v8, v12, v24
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vclmulh.vx v8, v12, a0
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s
index d974a077ab5a94..302b5fb05a08a5 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s
@@ -36,13 +36,13 @@ vgmul.vv v4, v8
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 24
-# CHECK-NEXT: Total Cycles: 45
+# CHECK-NEXT: Total Cycles: 78
# CHECK-NEXT: Total uOps: 24
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.53
-# CHECK-NEXT: IPC: 0.53
-# CHECK-NEXT: Block RThroughput: 36.0
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 72.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -54,29 +54,29 @@ vgmul.vv v4, v8
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 2 4.00 vghsh.vv v4, v8, v12
-# CHECK-NEXT: 1 2 4.00 vgmul.vv v4, v8
+# CHECK-NEXT: 1 8 8.00 vghsh.vv v4, v8, v12
+# CHECK-NEXT: 1 8 8.00 vgmul.vv v4, v8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 2 4.00 vghsh.vv v4, v8, v12
-# CHECK-NEXT: 1 2 4.00 vgmul.vv v4, v8
+# CHECK-NEXT: 1 8 8.00 vghsh.vv v4, v8, v12
+# CHECK-NEXT: 1 8 8.00 vgmul.vv v4, v8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 2 0.50 vghsh.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vgmul.vv v4, v8
+# CHECK-NEXT: 1 2 1.00 vghsh.vv v4, v8, v12
+# CHECK-NEXT: 1 2 1.00 vgmul.vv v4, v8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 2 0.50 vghsh.vv v4, v8, v12
-# CHECK-NEXT: 1 2 0.50 vgmul.vv v4, v8
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
# CHECK-NEXT: 1 2 1.00 vghsh.vv v4, v8, v12
# CHECK-NEXT: 1 2 1.00 vgmul.vv v4, v8
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
# CHECK-NEXT: 1 2 2.00 vghsh.vv v4, v8, v12
# CHECK-NEXT: 1 2 2.00 vgmul.vv v4, v8
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 4 4.00 vghsh.vv v4, v8, v12
+# CHECK-NEXT: 1 4 4.00 vgmul.vv v4, v8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 2 4.00 vghsh.vv v8, v16, v24
-# CHECK-NEXT: 1 2 4.00 vgmul.vv v8, v16
+# CHECK-NEXT: 1 8 8.00 vghsh.vv v8, v16, v24
+# CHECK-NEXT: 1 8 8.00 vgmul.vv v8, v16
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 2 2.00 vghsh.vv v4, v8, v12
-# CHECK-NEXT: 1 2 2.00 vgmul.vv v4, v8
+# CHECK-NEXT: 1 4 4.00 vghsh.vv v4, v8, v12
+# CHECK-NEXT: 1 4 4.00 vgmul.vv v4, v8
# CHECK: Resources:
# CHECK-NEXT: [0] - SiFiveP600Div
@@ -98,31 +98,31 @@ vgmul.vv v4, v8
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
-# CHECK-NEXT: - - - - 8.00 - - - - - - 36.00 36.00 - - -
+# CHECK-NEXT: - - - - 8.00 - - - - - - 72.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vghsh.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vghsh.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vgmul.vv v4, v8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vghsh.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vghsh.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vgmul.vv v4, v8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vghsh.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vghsh.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vgmul.vv v4, v8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vghsh.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vghsh.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vgmul.vv v4, v8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vghsh.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vghsh.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vgmul.vv v4, v8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vghsh.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vghsh.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vgmul.vv v4, v8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vghsh.vv v8, v16, v24
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vghsh.vv v8, v16, v24
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vgmul.vv v8, v16
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vghsh.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vghsh.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vgmul.vv v4, v8
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s
index a5c226e34452d1..13578eca0c5196 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s
@@ -60,13 +60,13 @@ vaesz.vs v8, v16
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 50
-# CHECK-NEXT: Total Cycles: 73
+# CHECK-NEXT: Total Cycles: 139
# CHECK-NEXT: Total uOps: 50
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.68
-# CHECK-NEXT: IPC: 0.68
-# CHECK-NEXT: Block RThroughput: 72.0
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 137.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -78,26 +78,16 @@ vaesz.vs v8, v16
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 2 0.50 vaesef.vv v4, v8
-# CHECK-NEXT: 1 2 0.50 vaesef.vs v4, v8
-# CHECK-NEXT: 1 2 0.50 vaesem.vv v4, v8
-# CHECK-NEXT: 1 2 0.50 vaesem.vs v4, v8
-# CHECK-NEXT: 1 2 0.50 vaesdm.vv v4, v8
-# CHECK-NEXT: 1 2 0.50 vaesdm.vs v4, v8
-# CHECK-NEXT: 1 2 0.50 vaeskf1.vi v4, v8, 8
-# CHECK-NEXT: 1 2 0.50 vaeskf2.vi v4, v8, 8
-# CHECK-NEXT: 1 1 0.50 vaesz.vs v4, v8
+# CHECK-NEXT: 1 2 1.00 vaesef.vv v4, v8
+# CHECK-NEXT: 1 2 1.00 vaesef.vs v4, v8
+# CHECK-NEXT: 1 2 1.00 vaesem.vv v4, v8
+# CHECK-NEXT: 1 2 1.00 vaesem.vs v4, v8
+# CHECK-NEXT: 1 2 1.00 vaesdm.vv v4, v8
+# CHECK-NEXT: 1 2 1.00 vaesdm.vs v4, v8
+# CHECK-NEXT: 1 2 1.00 vaeskf1.vi v4, v8, 8
+# CHECK-NEXT: 1 2 1.00 vaeskf2.vi v4, v8, 8
+# CHECK-NEXT: 1 2 1.00 vaesz.vs v4, v8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 2 0.50 vaesef.vv v4, v8
-# CHECK-NEXT: 1 2 0.50 vaesef.vs v4, v8
-# CHECK-NEXT: 1 2 0.50 vaesem.vv v4, v8
-# CHECK-NEXT: 1 2 0.50 vaesem.vs v4, v8
-# CHECK-NEXT: 1 2 0.50 vaesdm.vv v4, v8
-# CHECK-NEXT: 1 2 0.50 vaesdm.vs v4, v8
-# CHECK-NEXT: 1 2 0.50 vaeskf1.vi v4, v8, 8
-# CHECK-NEXT: 1 2 0.50 vaeskf2.vi v4, v8, 8
-# CHECK-NEXT: 1 1 0.50 vaesz.vs v4, v8
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
# CHECK-NEXT: 1 2 1.00 vaesef.vv v4, v8
# CHECK-NEXT: 1 2 1.00 vaesef.vs v4, v8
# CHECK-NEXT: 1 2 1.00 vaesem.vv v4, v8
@@ -106,8 +96,8 @@ vaesz.vs v8, v16
# CHECK-NEXT: 1 2 1.00 vaesdm.vs v4, v8
# CHECK-NEXT: 1 2 1.00 vaeskf1.vi v4, v8, 8
# CHECK-NEXT: 1 2 1.00 vaeskf2.vi v4, v8, 8
-# CHECK-NEXT: 1 1 1.00 vaesz.vs v4, v8
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 2 1.00 vaesz.vs v4, v8
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
# CHECK-NEXT: 1 2 2.00 vaesef.vv v4, v8
# CHECK-NEXT: 1 2 2.00 vaesef.vs v4, v8
# CHECK-NEXT: 1 2 2.00 vaesem.vv v4, v8
@@ -116,17 +106,27 @@ vaesz.vs v8, v16
# CHECK-NEXT: 1 2 2.00 vaesdm.vs v4, v8
# CHECK-NEXT: 1 2 2.00 vaeskf1.vi v4, v8, 8
# CHECK-NEXT: 1 2 2.00 vaeskf2.vi v4, v8, 8
-# CHECK-NEXT: 1 1 2.00 vaesz.vs v4, v8
+# CHECK-NEXT: 1 2 1.00 vaesz.vs v4, v8
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 2 4.00 vaesef.vv v4, v8
+# CHECK-NEXT: 1 2 4.00 vaesef.vs v4, v8
+# CHECK-NEXT: 1 2 4.00 vaesem.vv v4, v8
+# CHECK-NEXT: 1 2 4.00 vaesem.vs v4, v8
+# CHECK-NEXT: 1 2 4.00 vaesdm.vv v4, v8
+# CHECK-NEXT: 1 2 4.00 vaesdm.vs v4, v8
+# CHECK-NEXT: 1 2 4.00 vaeskf1.vi v4, v8, 8
+# CHECK-NEXT: 1 2 4.00 vaeskf2.vi v4, v8, 8
+# CHECK-NEXT: 1 2 2.00 vaesz.vs v4, v8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 2 4.00 vaesef.vv v8, v16
-# CHECK-NEXT: 1 2 4.00 vaesef.vs v8, v16
-# CHECK-NEXT: 1 2 4.00 vaesem.vv v8, v16
-# CHECK-NEXT: 1 2 4.00 vaesem.vs v8, v16
-# CHECK-NEXT: 1 2 4.00 vaesdm.vv v8, v16
-# CHECK-NEXT: 1 2 4.00 vaesdm.vs v8, v16
-# CHECK-NEXT: 1 2 4.00 vaeskf1.vi v8, v16, 8
-# CHECK-NEXT: 1 2 4.00 vaeskf2.vi v8, v16, 8
-# CHECK-NEXT: 1 1 4.00 vaesz.vs v8, v16
+# CHECK-NEXT: 1 2 8.00 vaesef.vv v8, v16
+# CHECK-NEXT: 1 2 8.00 vaesef.vs v8, v16
+# CHECK-NEXT: 1 2 8.00 vaesem.vv v8, v16
+# CHECK-NEXT: 1 2 8.00 vaesem.vs v8, v16
+# CHECK-NEXT: 1 2 8.00 vaesdm.vv v8, v16
+# CHECK-NEXT: 1 2 8.00 vaesdm.vs v8, v16
+# CHECK-NEXT: 1 2 8.00 vaeskf1.vi v8, v16, 8
+# CHECK-NEXT: 1 2 8.00 vaeskf2.vi v8, v16, 8
+# CHECK-NEXT: 1 2 4.00 vaesz.vs v8, v16
# CHECK: Resources:
# CHECK-NEXT: [0] - SiFiveP600Div
@@ -148,40 +148,40 @@ vaesz.vs v8, v16
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
-# CHECK-NEXT: - - - - 5.00 - - - - - - 71.00 73.00 - - -
+# CHECK-NEXT: - - - - 5.00 - - - - - - 137.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesef.vv v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesef.vs v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesef.vv v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesef.vs v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vv v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vs v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesdm.vv v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesdm.vs v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesdm.vv v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesdm.vs v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf1.vi v4, v8, 8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf2.vi v4, v8, 8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesz.vs v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesz.vs v4, v8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesef.vv v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesef.vs v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesem.vv v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vv v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vs v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesdm.vv v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesdm.vv v4, v8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesdm.vs v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaeskf1.vi v4, v8, 8
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaeskf2.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf1.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf2.vi v4, v8, 8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesz.vs v4, v8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesef.vv v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesef.vs v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesef.vs v4, v8
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesem.vv v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesem.vs v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesdm.vv v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesdm.vs v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaeskf1.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesem.vs v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesdm.vv v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesdm.vs v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaeskf1.vi v4, v8, 8
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaeskf2.vi v4, v8, 8
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesz.vs v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesz.vs v4, v8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesef.vv v4, v8
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesef.vs v4, v8
@@ -189,16 +189,16 @@ vaesz.vs v8, v16
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesem.vs v4, v8
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesdm.vv v4, v8
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesdm.vs v4, v8
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaeskf1.vi v4, v8, 8
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaeskf2.vi v4, v8, 8
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaesz.vs v4, v8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaeskf1.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaeskf2.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesz.vs v4, v8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaesef.vv v8, v16
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesef.vv v8, v16
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesef.vs v8, v16
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesem.vv v8, v16
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesem.vs v8, v16
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesdm.vv v8, v16
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaesdm.vs v8, v16
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaeskf1.vi v8, v16, 8
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaeskf2.vi v8, v16, 8
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaesz.vs v8, v16
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesdm.vs v8, v16
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaeskf1.vi v8, v16, 8
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaeskf2.vi v8, v16, 8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesz.vs v8, v16
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s
index f1a2a1899f0cf7..20ac87a724af16 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s
@@ -45,13 +45,13 @@ vsha2cl.vv v8, v16, v24
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 32
-# CHECK-NEXT: Total Cycles: 68
+# CHECK-NEXT: Total Cycles: 108
# CHECK-NEXT: Total uOps: 32
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.47
-# CHECK-NEXT: IPC: 0.47
-# CHECK-NEXT: Block RThroughput: 45.0
+# CHECK-NEXT: uOps Per Cycle: 0.30
+# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: Block RThroughput: 97.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -63,37 +63,37 @@ vsha2cl.vv v8, v16, v24
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 3 0.50 vsha2ms.vv v4, v8, v12
-# CHECK-NEXT: 1 3 0.50 vsha2ch.vv v4, v8, v12
-# CHECK-NEXT: 1 3 0.50 vsha2cl.vv v4, v8, v12
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
# CHECK-NEXT: 1 3 1.00 vsha2ms.vv v4, v8, v12
# CHECK-NEXT: 1 3 1.00 vsha2ch.vv v4, v8, v12
# CHECK-NEXT: 1 3 1.00 vsha2cl.vv v4, v8, v12
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
# CHECK-NEXT: 1 3 2.00 vsha2ms.vv v4, v8, v12
# CHECK-NEXT: 1 3 2.00 vsha2ch.vv v4, v8, v12
# CHECK-NEXT: 1 3 2.00 vsha2cl.vv v4, v8, v12
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 3 4.00 vsha2ms.vv v4, v8, v12
+# CHECK-NEXT: 1 3 4.00 vsha2ch.vv v4, v8, v12
+# CHECK-NEXT: 1 3 4.00 vsha2cl.vv v4, v8, v12
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 3 4.00 vsha2ms.vv v8, v16, v24
-# CHECK-NEXT: 1 3 4.00 vsha2ch.vv v8, v16, v24
-# CHECK-NEXT: 1 3 4.00 vsha2cl.vv v8, v16, v24
+# CHECK-NEXT: 1 3 8.00 vsha2ms.vv v8, v16, v24
+# CHECK-NEXT: 1 3 8.00 vsha2ch.vv v8, v16, v24
+# CHECK-NEXT: 1 3 8.00 vsha2cl.vv v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 3 0.50 vsha2ms.vv v4, v8, v12
-# CHECK-NEXT: 1 3 0.50 vsha2ch.vv v4, v8, v12
-# CHECK-NEXT: 1 3 0.50 vsha2cl.vv v4, v8, v12
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu
# CHECK-NEXT: 1 3 1.00 vsha2ms.vv v4, v8, v12
# CHECK-NEXT: 1 3 1.00 vsha2ch.vv v4, v8, v12
# CHECK-NEXT: 1 3 1.00 vsha2cl.vv v4, v8, v12
-# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 3 2.00 vsha2ms.vv v4, v8, v12
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: 1 3 3.00 vsha2ms.vv v4, v8, v12
# CHECK-NEXT: 1 3 2.00 vsha2ch.vv v4, v8, v12
# CHECK-NEXT: 1 3 2.00 vsha2cl.vv v4, v8, v12
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: 1 3 6.00 vsha2ms.vv v4, v8, v12
+# CHECK-NEXT: 1 3 4.00 vsha2ch.vv v4, v8, v12
+# CHECK-NEXT: 1 3 4.00 vsha2cl.vv v4, v8, v12
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 3 4.00 vsha2ms.vv v8, v16, v24
-# CHECK-NEXT: 1 3 4.00 vsha2ch.vv v8, v16, v24
-# CHECK-NEXT: 1 3 4.00 vsha2cl.vv v8, v16, v24
+# CHECK-NEXT: 1 3 12.00 vsha2ms.vv v8, v16, v24
+# CHECK-NEXT: 1 3 8.00 vsha2ch.vv v8, v16, v24
+# CHECK-NEXT: 1 3 8.00 vsha2cl.vv v8, v16, v24
# CHECK: Resources:
# CHECK-NEXT: [0] - SiFiveP600Div
@@ -115,39 +115,39 @@ vsha2cl.vv v8, v16, v24
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
-# CHECK-NEXT: - - - - 8.00 - - - - - - 44.00 46.00 - - -
+# CHECK-NEXT: - - - - 8.00 - - - - - - 97.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsha2ms.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ms.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ch.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2cl.vv v4, v8, v12
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2ms.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2ms.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2ch.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2cl.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2cl.vv v4, v8, v12
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2ms.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2ch.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2cl.vv v4, v8, v12
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2ms.vv v8, v16, v24
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2ms.vv v8, v16, v24
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2ch.vv v8, v16, v24
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2cl.vv v8, v16, v24
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2cl.vv v8, v16, v24
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ms.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsha2ch.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ch.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2cl.vv v4, v8, v12
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2ms.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - vsha2ms.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2ch.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2cl.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2cl.vv v4, v8, v12
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2ms.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vsha2ch.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 6.00 - - - - vsha2ms.vv v4, v8, v12
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2ch.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2cl.vv v4, v8, v12
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2ms.vv v8, v16, v24
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2ch.vv v8, v16, v24
-# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2cl.vv v8, v16, v24
+# CHECK-NEXT: - - - - - - - - - - - 12.00 - - - - vsha2ms.vv v8, v16, v24
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2ch.vv v8, v16, v24
+# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2cl.vv v8, v16, v24
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s
index 574bbb36c23f3c..7d11f467e1868a 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s
@@ -25,13 +25,13 @@ vsm3c.vi v8, v16, 8
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 15
-# CHECK-NEXT: Total Cycles: 31
+# CHECK-NEXT: Total Cycles: 28
# CHECK-NEXT: Total uOps: 15
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.48
-# CHECK-NEXT: IPC: 0.48
-# CHECK-NEXT: Block RThroughput: 32.0
+# CHECK-NEXT: uOps Per Cycle: 0.54
+# CHECK-NEXT: IPC: 0.54
+# CHECK-NEXT: Block RThroughput: 25.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -43,20 +43,20 @@ vsm3c.vi v8, v16, 8
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 3 1.00 vsm3me.vv v4, v8, v12
+# CHECK-NEXT: 1 6 1.00 vsm3me.vv v4, v8, v12
# CHECK-NEXT: 1 3 1.00 vsm3c.vi v4, v8, 8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 3 1.00 vsm3me.vv v4, v8, v12
+# CHECK-NEXT: 1 6 1.00 vsm3me.vv v4, v8, v12
# CHECK-NEXT: 1 3 1.00 vsm3c.vi v4, v8, 8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 3 2.00 vsm3me.vv v4, v8, v12
-# CHECK-NEXT: 1 3 2.00 vsm3c.vi v4, v8, 8
+# CHECK-NEXT: 1 6 2.00 vsm3me.vv v4, v8, v12
+# CHECK-NEXT: 1 3 1.00 vsm3c.vi v4, v8, 8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 3 4.00 vsm3me.vv v4, v8, v12
-# CHECK-NEXT: 1 3 4.00 vsm3c.vi v4, v8, 8
+# CHECK-NEXT: 1 6 4.00 vsm3me.vv v4, v8, v12
+# CHECK-NEXT: 1 3 2.00 vsm3c.vi v4, v8, 8
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 3 8.00 vsm3me.vv v8, v16, v24
-# CHECK-NEXT: 1 3 8.00 vsm3c.vi v8, v16, 8
+# CHECK-NEXT: 1 6 8.00 vsm3me.vv v8, v16, v24
+# CHECK-NEXT: 1 3 4.00 vsm3c.vi v8, v16, 8
# CHECK: Resources:
# CHECK-NEXT: [0] - SiFiveP600Div
@@ -78,7 +78,7 @@ vsm3c.vi v8, v16, 8
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
-# CHECK-NEXT: - - - - 5.00 - - - - - - 32.00 - - - -
+# CHECK-NEXT: - - - - 5.00 - - - - - - 25.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
@@ -90,10 +90,10 @@ vsm3c.vi v8, v16, 8
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm3c.vi v4, v8, 8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsm3me.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsm3c.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm3c.vi v4, v8, 8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsm3me.vv v4, v8, v12
-# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsm3c.vi v4, v8, 8
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsm3c.vi v4, v8, 8
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsm3me.vv v8, v16, v24
-# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsm3c.vi v8, v16, 8
+# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsm3c.vi v8, v16, 8
More information about the llvm-commits
mailing list