[llvm] [RISCV] Update Andes45 vector load/stores scheduling info (PR #173806)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 28 17:10:44 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Jim Lin (tclin914)
<details>
<summary>Changes</summary>
This PR adds latency/throughput for all RVV load/stores to the Andes45 series scheduling model.
---
Patch is 884.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/173806.diff
5 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVSchedAndes45.td (+180-25)
- (modified) llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vle-vse-vlm.s (+100-100)
- (modified) llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vlse-vsse.s (+89-89)
- (modified) llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vlseg-vsseg.s (+1401-1401)
- (modified) llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vlxe-vsxe.s (+177-177)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
index d5f523711100a..8e4b96a205edb 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
@@ -8,6 +8,27 @@
//===----------------------------------------------------------------------===//
+defvar Andes45VLEN = 512;
+defvar Andes45DLEN = 512;
+defvar Andes45VLEN_DLEN_RATIO = !div(Andes45VLEN, Andes45DLEN);
+
+assert !or(!eq(Andes45VLEN_DLEN_RATIO, 1), !eq(Andes45VLEN_DLEN_RATIO, 2)),
+ "Andes45VLEN / Andes45DLEN should be 1 or 2";
+
+defvar Andes45BIU_DATA_WIDTH = 512;
+defvar Andes45DLEN_BIU_DATA_WIDTH_RATIO = !div(Andes45DLEN, Andes45BIU_DATA_WIDTH);
+
+assert !or(!eq(Andes45DLEN_BIU_DATA_WIDTH_RATIO, 1), !eq(Andes45DLEN_BIU_DATA_WIDTH_RATIO, 2)),
+ "Andes45DLEN / Andes45DLEN_BIU_DATA_WIDTH_RATIO should be 1 or 2";
+
+// HVM region: VLSU_MEM_DW equals DLEN
+// Cachable/Non-cachable region: VLSU_MEM_DW equals BIU_DATA_WIDTH
+defvar Andes45VLSU_MEM_DW = Andes45BIU_DATA_WIDTH;
+defvar Andes45VLEN_VLSU_MEM_DW_RATIO = !div(Andes45VLEN, Andes45VLSU_MEM_DW);
+
+// There are various latency depending on its memory type and status.
+defvar VLSU_MEM_LATENCY = 13;
+
// The worst case LMUL is the largest LMUL.
class Andes45IsWorstCaseMX<string mx, list<string> MxList> {
defvar LLMUL = LargestLMUL<MxList>.r;
@@ -22,6 +43,45 @@ class Andes45IsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
}
+// (VLEN/VLSU_MEM_DW)*EMUL
+class Andes45GetCyclesLoadStore<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1),
+ !eq(mx, "M2") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 2),
+ !eq(mx, "M4") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 4),
+ !eq(mx, "M8") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 8),
+ !eq(mx, "MF2") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1),
+ !eq(mx, "MF4") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1),
+ !eq(mx, "MF8") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1)
+ );
+}
+
+class Andes45GetCyclesOnePerElement<string mx, int sew> {
+ defvar VL = !div(Andes45VLEN, sew);
+ int c = !cond(
+ !eq(mx, "M1") : VL,
+ !eq(mx, "M2") : !mul(VL, 2),
+ !eq(mx, "M4") : !mul(VL, 4),
+ !eq(mx, "M8") : !mul(VL, 8),
+ !eq(mx, "MF2") : !div(VL, 2),
+ !eq(mx, "MF4") : !div(VL, 4),
+ !eq(mx, "MF8") : !div(VL, 8)
+ );
+}
+
+// When fractional LMUL is used, the LMUL used in calculation is 1.
+class Andes45GetLMULValue<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 1,
+ !eq(mx, "M2") : 2,
+ !eq(mx, "M4") : 4,
+ !eq(mx, "M8") : 8,
+ !eq(mx, "MF2") : 1,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+}
+
def Andes45Model : SchedMachineModel {
let MicroOpBufferSize = 0; // Andes45 is in-order processor
let IssueWidth = 2; // 2 micro-ops dispatched per cycle
@@ -372,58 +432,153 @@ def : WriteRes<WriteVSETIVLI, [Andes45CSR]>;
def : WriteRes<WriteVSETVL, [Andes45CSR]>;
// 7. Vector Loads and Stores
+
+// Unit-stride loads and stores
+
+// The latency for loads is (4+VLSU_MEM_LATENCY).
+// The throughput for loads and stores is (VLEN/VLSU_MEM_DW)*EMUL.
foreach mx = SchedMxList in {
+ defvar Cycles = Andes45GetCyclesLoadStore<mx>.c;
defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
- // Unit-stride loads and stores
- defm "" : LMULWriteResMX<"WriteVLDE", [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDFF", [Andes45VLSU], mx, IsWorstCase>;
+ let Latency = !add(4, VLSU_MEM_LATENCY), ReleaseAtCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVLDE", [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDFF", [Andes45VLSU], mx, IsWorstCase>;
+ }
+ let ReleaseAtCycles = [Cycles] in
defm "" : LMULWriteResMX<"WriteVSTE", [Andes45VLSU], mx, IsWorstCase>;
// Mask loads and stores
+ let Latency = !add(4, VLSU_MEM_LATENCY), ReleaseAtCycles = [Cycles] in
defm "" : LMULWriteResMX<"WriteVLDM", [Andes45VLSU], mx, IsWorstCase=!eq(mx, "M1")>;
+ let ReleaseAtCycles = [Cycles] in
defm "" : LMULWriteResMX<"WriteVSTM", [Andes45VLSU], mx, IsWorstCase=!eq(mx, "M1")>;
+}
+
+// Strided loads and stores.
+
+// Strided loads and stores operate at one element per cycles.
+// We uses the SEW to compute the number of elements for throughput.
+// The latency for loads is (4+VLSU_MEM_LATENCY+(DLEN/EEW)).
+// The throughput for loads and stores is VL.
+foreach mx = SchedMxList in {
+ defvar Cycles = Andes45GetCyclesOnePerElement<mx, 8>.c;
+ defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
- // Strided and indexed loads and stores
foreach eew = [8, 16, 32, 64] in {
- defm "" : LMULWriteResMX<"WriteVLDS" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ let Latency = !add(4, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))),
+ ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVLDS" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTS" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ let ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVSTS" # eew, [Andes45VLSU], mx, IsWorstCase>;
}
}
-// Segmented loads and stores
+// Indexed loads and stores
+
+// Indexed loads and stores operate at one element per cycles.
+// We uses the SEW to compute the number of elements for throughput.
+// The latency for loads is (5+VLSU_MEM_LATENCY+(DLEN/EEW)).
+// The throughput for loads and stores is (VL+EMUL-1).
+foreach mx = SchedMxList in {
+ defvar Cycles = Andes45GetCyclesOnePerElement<mx, 8>.c;
+ defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
+
+ foreach eew = [8, 16, 32, 64] in {
+ let Latency = !add(5, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))),
+ ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue<mx>.c, 1))] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ }
+
+ let ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue<mx>.c, 1))] in {
+ defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ }
+ }
+}
+
+// Unit-Stride Segmented Loads and Stores
+
+// The latency for loads is (4+VLSU_MEM_LATENCY+EMUL* NFIELDS+2)
+// The throughput for loads and stores is (VLEN/VLSU_MEM_DW)*EMUL*NFIELDS.
+foreach mx = SchedMxList in {
+ foreach nf=2-8 in {
+ foreach eew = [8, 16, 32, 64] in {
+ defvar Cycles = Andes45GetCyclesLoadStore<mx>.c;
+ defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
+ defvar Size = !mul(Andes45GetLMULValue<mx>.c, nf);
+
+ let Latency = !add(4, !add(VLSU_MEM_LATENCY, !add(Size, 2))),
+ ReleaseAtCycles = [!mul(Cycles, nf)] in {
+ defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ }
+ // TODO
+ let ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ }
+ }
+}
+
+// Strided Segmented Loads and Stores
+
+// The latency for loads is (5+VLSU_MEM_LATENCY+(DLEN/EEW))
+// The throughput for loads and stores is VL.
foreach mx = SchedMxList in {
foreach nf=2-8 in {
foreach eew = [8, 16, 32, 64] in {
+ defvar Cycles = Andes45GetCyclesOnePerElement<mx, eew>.c;
defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
- // Unit-stride segmented
- defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
+ let Latency = !add(5, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))),
+ ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ let ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ }
+ }
+}
+
+// Indexed Segmented Loads and Stores
- // Strided/indexed segmented
- defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
+// The latency for loads is (6+VLSU_MEM_LATENCY+(DLEN/EEW))
+// The throughput for loads and stores is (VL+EMUL-1).
+foreach mx = SchedMxList in {
+ foreach nf=2-8 in {
+ foreach eew = [8, 16, 32, 64] in {
+ defvar Cycles = Andes45GetCyclesOnePerElement<mx, eew>.c;
+ defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
- // Indexed segmented
- defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
+ let Latency = !add(6, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))),
+ ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue<mx>.c, 1))] in {
+ defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ }
+ let ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue<mx>.c, 1))] in {
+ defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ }
}
}
}
// Whole register move/load/store
foreach LMul = [1, 2, 4, 8] in {
- def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [Andes45VLSU]>;
- def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [Andes45VLSU]>;
+ let Latency = 6, ReleaseAtCycles = [!mul(LMul, 2)] in
+ def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [Andes45VLSU]>;
+ let ReleaseAtCycles = [!mul(LMul, 2)] in
+ def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [Andes45VLSU]>;
def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [Andes45VPERMUT]>;
}
diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vle-vse-vlm.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vle-vse-vlm.s
index 375c615c0cc61..70f95ebe690f2 100644
--- a/llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vle-vse-vlm.s
+++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vle-vse-vlm.s
@@ -210,49 +210,49 @@ vle64ff.v v8, (a0)
# CHECK: [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, mf8, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, mf4, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, mf2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, m1, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, m2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 2.00 * 17 Andes45VLSU[2] VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, m4, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 4.00 * 17 Andes45VLSU[4] VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, m8, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 8.00 * 17 Andes45VLSU[8] VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, mf4, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, mf2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, m1, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, m2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 2.00 * 17 Andes45VLSU[2] VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, m4, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 4.00 * 17 Andes45VLSU[4] VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, m8, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 8.00 * 17 Andes45VLSU[8] VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e32, mf2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE32_V vle32.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE32_V vle32.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e32, m1, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE32_V vle32.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE32_V vle32.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e32, m2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE32_V vle32.v v8, (a0)
+# CHECK-NEXT: 1 17 2.00 * 17 Andes45VLSU[2] VLE32_V vle32.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e32, m4, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE32_V vle32.v v8, (a0)
+# CHECK-NEXT: 1 17 4.00 * 17 Andes45VLSU[4] VLE32_V vle32.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e32, m8, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE32_V vle32.v v8, (a0)
+# CHECK-NEXT: 1 17 8.00 * 17 Andes45VLSU[8] VLE32_V vle32.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e64, m1, ta, ma
-# CHECK-NEXT: 1 1 1.00 *...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/173806
More information about the llvm-commits
mailing list