[llvm] [RISCV] LMUL lists for indexed and strided loads (PR #169756)

Wed Nov 26 19:25:09 PST 2025

https://github.com/ppenzin created https://github.com/llvm/llvm-project/pull/169756

Create additional lists representing valid LMULs for strided and indexed load of particular element sizes.

>From 7d85a0310bbe99464c7bd1e3bcd05da09e66d496 Mon Sep 17 00:00:00 2001
From: Petr Penzin <penzin.dev at gmail.com>
Date: Wed, 26 Nov 2025 19:15:39 -0800
Subject: [PATCH] [RISCV] LMUL lists for indexed and strided loads

Create additional lists representing valid LMULs for strided and indexed load
of particular element sizes.
---
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td     | 12 +++++-------
 llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td |  8 ++++----
 llvm/lib/Target/RISCV/RISCVScheduleV.td        |  8 ++++++++
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 36a2f46416674..21d882ee8f426 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -562,7 +562,7 @@ multiclass SiFive7WriteResBase<int VLEN,
   // resource, we do not need to use LMULSEWXXX constructors. However, we do
   // use the SEW from the name to determine the number of Cycles.
 
-  foreach mx = SchedMxList in {
+  foreach mx = SchedMxListDS8 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -582,10 +582,8 @@ multiclass SiFive7WriteResBase<int VLEN,
       defm : LMULWriteResMX<"WriteVSTOX8", [VCQ, VS], mx, IsWorstCase>;
     }
   }
-  // TODO: The MxLists need to be filtered by EEW. We only need to support
-  // LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
-  // since LMUL >= 16/64.
-  foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+
+  foreach mx = SchedMxListDS16 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -605,7 +603,7 @@ multiclass SiFive7WriteResBase<int VLEN,
       defm : LMULWriteResMX<"WriteVSTOX16", [VCQ, VS], mx, IsWorstCase>;
     }
   }
-  foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+  foreach mx = SchedMxListDS32 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -625,7 +623,7 @@ multiclass SiFive7WriteResBase<int VLEN,
       defm : LMULWriteResMX<"WriteVSTOX32", [VCQ, VS], mx, IsWorstCase>;
     }
   }
-  foreach mx = ["M1", "M2", "M4", "M8"] in {
+  foreach mx = SchedMxListDS64 in {
     defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
     defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, VLEN>.c;
     defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
index a22552de71360..02c1b5fcf6462 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
@@ -437,7 +437,7 @@ foreach mx = SchedMxList in {
   defm "" : LMULWriteResMX<"WriteVSTM",    [AscalonLS], mx, IsWorstCase>;
 }
 
-foreach mx = SchedMxList in {
+foreach mx = SchedMxListDS8 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {
@@ -449,7 +449,7 @@ foreach mx = SchedMxList in {
     defm "" : LMULWriteResMX<"WriteVSTOX8", [AscalonLS], mx, IsWorstCase>;
   }
 }
-foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListDS16 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {
@@ -461,7 +461,7 @@ foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
     defm "" : LMULWriteResMX<"WriteVSTOX16", [AscalonLS], mx, IsWorstCase>;
   }
 }
-foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListDS32 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {
@@ -473,7 +473,7 @@ foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
     defm "" : LMULWriteResMX<"WriteVSTOX32", [AscalonLS], mx, IsWorstCase>;
   }
 }
-foreach mx = ["M1", "M2", "M4", "M8"] in {
+foreach mx = SchedMxListDS64 in {
   defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
   defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
   let Latency = Cycles in {
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index d11b446920c4e..f0831e278332a 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -19,6 +19,14 @@ defvar SchedMxListFW = !listremove(SchedMxList, ["M8", "MF8"]);
 defvar SchedMxListF = !listremove(SchedMxList, ["MF8"]);
 // Used for widening floating-point Reduction as it doesn't contain MF8.
 defvar SchedMxListFWRed = SchedMxListF;
+// Used for indexed and strided loads of 8 bit lanes, same as full MX list
+defvar SchedMxListDS8 = SchedMxList;
+// Used for indexed and strided loads of 16 bit lanes
+defvar SchedMxListDS16 = SchedMxListF;
+// Used for indexed and strided loads of 32 bit lanes
+defvar SchedMxListDS32 = !listremove(SchedMxListDS16, ["MF4"]);
+// Used for indexed and strided loads of 64 bit lanes
+defvar SchedMxListDS64 = !listremove(SchedMxListDS32, ["MF2"]);
 
 class SchedSEWSet<string mx, bit isF = 0, bit isWidening = 0> {
   assert !or(!not(isF), !ne(mx, "MF8")), "LMUL shouldn't be MF8 for floating-point";