[llvm] [RISCV] Update SiFive7's scheduling models with their optimizations on permutation instructions (PR #160763)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 12:27:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Min-Yih Hsu (mshockwave)
<details>
<summary>Changes</summary>
In newer SiFIve7 cores like X390, permutation instructions like vrgather.vv operates on LMUL smaller than a single DLEN could yield a constant cycle. For slightly larger data that fits in the constraint of `log2(SEW/8) + log2(LMUL) <= log2(DLEN / 32)`, these instructions can also yield cycles that are proportional to the quadratic of LMUL, rather than being proportional to VL.
---
Patch is 79.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160763.diff
3 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVSchedSiFive7.td (+94-10)
- (added) llvm/test/tools/llvm-mca/RISCV/SiFive7/vgather-vcompress.s (+314)
- (modified) llvm/test/tools/llvm-mca/RISCV/SiFiveX390/vgather-vcompress.s (+134-134)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 2e14ae3af957e..84b3f6497c75c 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -169,6 +169,64 @@ class SiFive7GetOrderedReductionCycles<string mx, int sew, int VLEN> {
int c = !mul(6, VLUpperBound);
}
+class isSingleDLEN<string mx> {
+ bit c = !or(!eq(mx, "MF2"), !or(!eq(mx, "MF4"), !eq(mx, "MF8")));
+}
+
+class SiFive7GetCyclesVRGatherVV<string mx, int sew, int VLEN,
+ bit hasFastGather> {
+ // if (hasFastGather && isSingleDLEN(mx))
+ // c = 1;
+ // else if (hasFastGather && (log2(SEW/8) + log2(LMUL) <= log2(DLEN / 32))
+ // c = LMUL * 2 * ceil(vl * SEW / DLEN);
+ // else
+ // c = vl;
+
+ defvar y = !logtwo(!div(sew, 8));
+ defvar x = !cond(
+ !eq(mx, "M1") : y,
+ !eq(mx, "M2") : !add(y, 1),
+ !eq(mx, "M4") : !add(y, 2),
+ !eq(mx, "M8") : !add(y, 3),
+ // Give isSingleDLEN(mx) cases a garbage value to avoid build failures,
+ // even though x will go unused.
+ true : 1
+ );
+ // LMUL * 2 * ceil(vl * SEW / DLEN) = LMUL * 2 * ceil(2 * LMUL)
+ defvar z = !cond(
+ !eq(mx, "M1") : 4,
+ !eq(mx, "M2") : 16,
+ !eq(mx, "M4") : 64,
+ !eq(mx, "M8") : 256,
+ // Give isSingleDLEN(mx) cases a garbage value to avoid build failures,
+ // even though z will go unused.
+ true : 1
+ );
+ defvar VLUpperBound = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
+ bit IsSingleDLEN = isSingleDLEN<mx>.c;
+
+ int c = !cond(
+ !and(hasFastGather, IsSingleDLEN) : 1,
+ !and(hasFastGather, !le(x, !logtwo(!div(VLEN, 64)))) : z,
+ true: VLUpperBound
+ );
+}
+
+class SiFive7GetCyclesVCompress<string mx, int sew, int VLEN,
+ bit hasFastGather> {
+
+ // if (hasFastGather && isSingleDLEN(mx))
+ // c = 1
+ // else
+ // c = vl
+ defvar VLUpperBound = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
+ bit IsSingleDLEN = isSingleDLEN<mx>.c;
+
+ int c = !if(!and(hasFastGather, IsSingleDLEN),
+ 1,
+ VLUpperBound);
+}
+
class SiFive7GetSiFiveVFNRClipCycles<string mx, int VLEN> {
int latency = !cond(
!eq(mx, "MF8"): 7,
@@ -259,7 +317,8 @@ multiclass SiFive7WriteResBase<int VLEN,
ProcResourceKind VL, ProcResourceKind VS,
ProcResourceKind VCQ,
SiFive7FPLatencies fpLatencies,
- bit isFP64Throttled = false> {
+ bit isFP64Throttled = false,
+ bit hasFastGather = false> {
// Branching
let Latency = 3 in {
@@ -976,13 +1035,33 @@ multiclass SiFive7WriteResBase<int VLEN,
foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
- defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [VCQ, VA1], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [VCQ, VA1], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVCompressV", [VCQ, VA1], mx, sew, IsWorstCase>;
- }
+ defvar IsSingleDLEN = isSingleDLEN<mx>.c;
+
+ defvar GatherVVCycles =
+ SiFive7GetCyclesVRGatherVV<mx, sew, VLEN, hasFastGather>.c;
+ // 7 + DLEN/ SEW
+ defvar SlowGatherLat = !add(7, !div(!div(VLEN, 2), sew));
+ defvar GatherVVLat = !if(hasFastGather,
+ !add(3, GatherVVCycles), SlowGatherLat);
+
+ let Latency = GatherVVLat, AcquireAtCycles = [0, 1],
+ ReleaseAtCycles = [1, !add(5, GatherVVCycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [VCQ, VA1], mx, sew, IsWorstCase>;
+
+ // VRGatherEI16VV is not improved by fastGather or fastLargeGather.
+ defvar GatherEI16VVCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
+ let Latency = SlowGatherLat, AcquireAtCycles = [0, 1],
+ ReleaseAtCycles = [1, !add(5, GatherEI16VVCycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [VCQ, VA1], mx, sew, IsWorstCase>;
+
+ defvar CompressCycles = SiFive7GetCyclesVCompress<mx, sew, VLEN, hasFastGather>.c;
+ defvar CompressLat = !if(!and(hasFastGather, IsSingleDLEN),
+ 4,
+ !add(7, CompressCycles)); // 7 + VL
+ let Latency = CompressLat, AcquireAtCycles = [0, 1],
+ ReleaseAtCycles = [1, !add(8, CompressCycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [VCQ, VA1], mx, sew, IsWorstCase>;
}
}
@@ -1408,7 +1487,8 @@ multiclass SiFive7ReadAdvance {
/// eventually be supplied by different SchedMachineModels.
multiclass SiFive7SchedResources<int vlen, bit extraVALU,
SiFive7FPLatencies fpLatencies,
- bit isFP64Throttled> {
+ bit isFP64Throttled,
+ bit hasFastGather> {
defm SiFive7 : SiFive7ProcResources<extraVALU>;
// Pull out defs from SiFive7ProcResources so we can refer to them by name.
@@ -1435,7 +1515,8 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU,
: SiFive7WriteResBase<vlen, SiFive7PipeA, SiFive7PipeB, SiFive7PipeAB,
SiFive7IDiv, SiFive7FDiv, SiFive7VA1,
SiFive7VA1OrVA2, SiFive7VL, SiFive7VS,
- SiFive7VCQ, fpLatencies, isFP64Throttled>;
+ SiFive7VCQ, fpLatencies, isFP64Throttled,
+ hasFastGather>;
//===----------------------------------------------------------------------===//
// Bypass and advance
@@ -1468,6 +1549,7 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel {
SiFive7FPLatencies FPLatencies;
bit IsFP64Throttled = false;
+ bit HasFastGather = false;
string Name = !subst("Model", "", !subst("SiFive7", "", NAME));
}
@@ -1494,6 +1576,7 @@ def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
let HasExtraVALU = true;
let FPLatencies = SiFive7LowFPLatencies;
let IsFP64Throttled = true;
+ let HasFastGather = true;
}
/// Binding models to their scheduling resources.
@@ -1501,7 +1584,8 @@ foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in {
let SchedModel = model in
defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU,
model.FPLatencies,
- model.IsFP64Throttled>;
+ model.IsFP64Throttled,
+ model.HasFastGather>;
}
// Some model name aliases.
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/vgather-vcompress.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/vgather-vcompress.s
new file mode 100644
index 0000000000000..4ec1683a886dc
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/vgather-vcompress.s
@@ -0,0 +1,314 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -instruction-tables=full -iterations=1 < %s | FileCheck %s
+
+# The legal (SEW, LMUL) pairs are:
+# (e8, mf8) (e8, mf4) (e8, mf2) (e8, m1) (e8, m2) (e8, m4) (e8, m8)
+# (e16, mf4) (e16, mf2) (e16, m1) (e16, m2) (e16, m4) (e16, m8)
+# (e32, mf2) (e32, m1) (e32, m2) (e32, m4) (e32, m8)
+# (e64, m1) (e64, m2) (e64, m4) (e64, m8)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e8, mf4, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e8, mf2, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e8, m1, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e8, m2, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e8, m4, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e8, m8, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e16, mf4, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e16, mf2, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e16, m1, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e16, m2, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e16, m4, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e16, m8, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e32, mf2, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e32, m1, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e32, m2, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e32, m4, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e32, m8, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e64, m1, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e64, m2, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e64, m4, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+vsetvli zero, zero, e64, m8, tu, mu
+vrgather.vv v8, v16, v24
+vrgatherei16.vv v8, v16, v24
+vcompress.vm v8, v16, v24
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - VLEN512SiFive7FDiv:1
+# CHECK-NEXT: [1] - VLEN512SiFive7IDiv:1
+# CHECK-NEXT: [2] - VLEN512SiFive7PipeA:1
+# CHECK-NEXT: [3] - VLEN512SiFive7PipeAB:2 VLEN512SiFive7PipeA, VLEN512SiFive7PipeB
+# CHECK-NEXT: [4] - VLEN512SiFive7PipeB:1
+# CHECK-NEXT: [5] - VLEN512SiFive7VA:1
+# CHECK-NEXT: [6] - VLEN512SiFive7VCQ:1
+# CHECK-NEXT: [7] - VLEN512SiFive7VL:1
+# CHECK-NEXT: [8] - VLEN512SiFive7VS:1
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+# CHECK-NEXT: [7]: Bypass Latency
+# CHECK-NEXT: [8]: Resources (<Name> | <Name>[<ReleaseAtCycle>] | <Name>[<AcquireAtCycle>,<ReleaseAtCycle])
+# CHECK-NEXT: [9]: LLVM Opcode Name
+
+# CHECK: [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: 1 39 12.00 39 VLEN512SiFive7VA[1,13],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 39 12.00 39 VLEN512SiFive7VA[1,13],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 15 15.00 15 VLEN512SiFive7VA[1,16],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: 1 39 20.00 39 VLEN512SiFive7VA[1,21],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 39 20.00 39 VLEN512SiFive7VA[1,21],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 23 23.00 23 VLEN512SiFive7VA[1,24],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: 1 39 36.00 39 VLEN512SiFive7VA[1,37],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 39 36.00 39 VLEN512SiFive7VA[1,37],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 39 39.00 39 VLEN512SiFive7VA[1,40],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 39 68.00 39 VLEN512SiFive7VA[1,69],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 39 68.00 39 VLEN512SiFive7VA[1,69],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 71 71.00 71 VLEN512SiFive7VA[1,72],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: 1 39 132.00 39 VLEN512SiFive7VA[1,133],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 39 132.00 39 VLEN512SiFive7VA[1,133],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 135 135.00 135 VLEN512SiFive7VA[1,136],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: 1 39 260.00 39 VLEN512SiFive7VA[1,261],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 39 260.00 39 VLEN512SiFive7VA[1,261],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 263 263.00 263 VLEN512SiFive7VA[1,264],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: 1 39 516.00 39 VLEN512SiFive7VA[1,517],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 39 516.00 39 VLEN512SiFive7VA[1,517],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 519 519.00 519 VLEN512SiFive7VA[1,520],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: 1 23 12.00 23 VLEN512SiFive7VA[1,13],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 23 12.00 23 VLEN512SiFive7VA[1,13],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 15 15.00 15 VLEN512SiFive7VA[1,16],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: 1 23 20.00 23 VLEN512SiFive7VA[1,21],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 23 20.00 23 VLEN512SiFive7VA[1,21],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 23 23.00 23 VLEN512SiFive7VA[1,24],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 23 36.00 23 VLEN512SiFive7VA[1,37],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 23 36.00 23 VLEN512SiFive7VA[1,37],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 39 39.00 39 VLEN512SiFive7VA[1,40],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: 1 23 68.00 23 VLEN512SiFive7VA[1,69],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 23 68.00 23 VLEN512SiFive7VA[1,69],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 71 71.00 71 VLEN512SiFive7VA[1,72],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: 1 23 132.00 23 VLEN512SiFive7VA[1,133],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 23 132.00 23 VLEN512SiFive7VA[1,133],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 135 135.00 135 VLEN512SiFive7VA[1,136],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: 1 23 260.00 23 VLEN512SiFive7VA[1,261],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 23 260.00 23 VLEN512SiFive7VA[1,261],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 263 263.00 263 VLEN512SiFive7VA[1,264],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB VSETVLI vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: 1 15 12.00 15 VLEN512SiFive7VA[1,13],VLEN512SiFive7VCQ VRGATHER_VV vrgather.vv v8, v16, v24
+# CHECK-NEXT: 1 15 12.00 15 VLEN512SiFive7VA[1,13],VLEN512SiFive7VCQ VRGATHEREI16_VV vrgatherei16.vv v8, v16, v24
+# CHECK-NEXT: 1 15 15.00 15 VLEN512SiFive7VA[1,16],VLEN512SiFive7VCQ VCOMPRESS_VM vcompress.vm v8, v16, v24
+# CHECK-NEXT: 1 3 1.00 U 1 VLEN512SiFive7PipeA,VLEN512SiFive...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/160763
More information about the llvm-commits
mailing list