[llvm] [RISCV] LMUL lists for indexed and strided loads (PR #169756)
Petr Penzin via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 26 19:25:09 PST 2025
https://github.com/ppenzin created https://github.com/llvm/llvm-project/pull/169756
Create additional lists representing valid LMULs for strided and indexed load of particular element sizes.
>From 7d85a0310bbe99464c7bd1e3bcd05da09e66d496 Mon Sep 17 00:00:00 2001
From: Petr Penzin <penzin.dev at gmail.com>
Date: Wed, 26 Nov 2025 19:15:39 -0800
Subject: [PATCH] [RISCV] LMUL lists for indexed and strided loads
Create additional lists representing valid LMULs for strided and indexed load
of particular element sizes.
---
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 12 +++++-------
llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 8 ++++----
llvm/lib/Target/RISCV/RISCVScheduleV.td | 8 ++++++++
3 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 36a2f46416674..21d882ee8f426 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -562,7 +562,7 @@ multiclass SiFive7WriteResBase<int VLEN,
// resource, we do not need to use LMULSEWXXX constructors. However, we do
// use the SEW from the name to determine the number of Cycles.
- foreach mx = SchedMxList in {
+ foreach mx = SchedMxListDS8 in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -582,10 +582,8 @@ multiclass SiFive7WriteResBase<int VLEN,
defm : LMULWriteResMX<"WriteVSTOX8", [VCQ, VS], mx, IsWorstCase>;
}
}
- // TODO: The MxLists need to be filtered by EEW. We only need to support
- // LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
- // since LMUL >= 16/64.
- foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+
+ foreach mx = SchedMxListDS16 in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -605,7 +603,7 @@ multiclass SiFive7WriteResBase<int VLEN,
defm : LMULWriteResMX<"WriteVSTOX16", [VCQ, VS], mx, IsWorstCase>;
}
}
- foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+ foreach mx = SchedMxListDS32 in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -625,7 +623,7 @@ multiclass SiFive7WriteResBase<int VLEN,
defm : LMULWriteResMX<"WriteVSTOX32", [VCQ, VS], mx, IsWorstCase>;
}
}
- foreach mx = ["M1", "M2", "M4", "M8"] in {
+ foreach mx = SchedMxListDS64 in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
index a22552de71360..02c1b5fcf6462 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
@@ -437,7 +437,7 @@ foreach mx = SchedMxList in {
defm "" : LMULWriteResMX<"WriteVSTM", [AscalonLS], mx, IsWorstCase>;
}
-foreach mx = SchedMxList in {
+foreach mx = SchedMxListDS8 in {
defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
let Latency = Cycles in {
@@ -449,7 +449,7 @@ foreach mx = SchedMxList in {
defm "" : LMULWriteResMX<"WriteVSTOX8", [AscalonLS], mx, IsWorstCase>;
}
}
-foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListDS16 in {
defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
let Latency = Cycles in {
@@ -461,7 +461,7 @@ foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
defm "" : LMULWriteResMX<"WriteVSTOX16", [AscalonLS], mx, IsWorstCase>;
}
}
-foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListDS32 in {
defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
let Latency = Cycles in {
@@ -473,7 +473,7 @@ foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
defm "" : LMULWriteResMX<"WriteVSTOX32", [AscalonLS], mx, IsWorstCase>;
}
}
-foreach mx = ["M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListDS64 in {
defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
let Latency = Cycles in {
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index d11b446920c4e..f0831e278332a 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -19,6 +19,14 @@ defvar SchedMxListFW = !listremove(SchedMxList, ["M8", "MF8"]);
defvar SchedMxListF = !listremove(SchedMxList, ["MF8"]);
// Used for widening floating-point Reduction as it doesn't contain MF8.
defvar SchedMxListFWRed = SchedMxListF;
+// Used for indexed and strided loads of 8 bit lanes, same as full MX list
+defvar SchedMxListDS8 = SchedMxList;
+// Used for indexed and strided loads of 16 bit lanes
+defvar SchedMxListDS16 = SchedMxListF;
+// Used for indexed and strided loads of 32 bit lanes
+defvar SchedMxListDS32 = !listremove(SchedMxListDS16, ["MF4"]);
+// Used for indexed and strided loads of 64 bit lanes
+defvar SchedMxListDS64 = !listremove(SchedMxListDS32, ["MF2"]);
class SchedSEWSet<string mx, bit isF = 0, bit isWidening = 0> {
assert !or(!not(isF), !ne(mx, "MF8")), "LMUL shouldn't be MF8 for floating-point";
More information about the llvm-commits
mailing list