[llvm] [RISCV] Update Andes45 vector load/stores scheduling info (PR #173806)
Jim Lin via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 30 23:22:53 PST 2025
================
@@ -372,58 +432,153 @@ def : WriteRes<WriteVSETIVLI, [Andes45CSR]>;
def : WriteRes<WriteVSETVL, [Andes45CSR]>;
// 7. Vector Loads and Stores
+
+// Unit-stride loads and stores
+
+// The latency for loads is (4+VLSU_MEM_LATENCY).
+// The throughput for loads and stores is (VLEN/VLSU_MEM_DW)*EMUL.
foreach mx = SchedMxList in {
+ defvar Cycles = Andes45GetCyclesLoadStore<mx>.c;
defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
- // Unit-stride loads and stores
- defm "" : LMULWriteResMX<"WriteVLDE", [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDFF", [Andes45VLSU], mx, IsWorstCase>;
+ let Latency = !add(4, VLSU_MEM_LATENCY), ReleaseAtCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVLDE", [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDFF", [Andes45VLSU], mx, IsWorstCase>;
+ }
+ let ReleaseAtCycles = [Cycles] in
defm "" : LMULWriteResMX<"WriteVSTE", [Andes45VLSU], mx, IsWorstCase>;
// Mask loads and stores
+ let Latency = !add(4, VLSU_MEM_LATENCY), ReleaseAtCycles = [Cycles] in
defm "" : LMULWriteResMX<"WriteVLDM", [Andes45VLSU], mx, IsWorstCase=!eq(mx, "M1")>;
+ let ReleaseAtCycles = [Cycles] in
defm "" : LMULWriteResMX<"WriteVSTM", [Andes45VLSU], mx, IsWorstCase=!eq(mx, "M1")>;
+}
+
+// Strided loads and stores.
+
+// Strided loads and stores operate at one element per cycles.
+// We uses the SEW to compute the number of elements for throughput.
+// The latency for loads is (4+VLSU_MEM_LATENCY+(DLEN/EEW)).
+// The throughput for loads and stores is VL.
+foreach mx = SchedMxList in {
+ defvar Cycles = Andes45GetCyclesOnePerElement<mx, 8>.c;
+ defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
- // Strided and indexed loads and stores
foreach eew = [8, 16, 32, 64] in {
- defm "" : LMULWriteResMX<"WriteVLDS" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ let Latency = !add(4, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))),
+ ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVLDS" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTS" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ let ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVSTS" # eew, [Andes45VLSU], mx, IsWorstCase>;
}
}
-// Segmented loads and stores
+// Indexed loads and stores
+
+// Indexed loads and stores operate at one element per cycles.
+// We uses the SEW to compute the number of elements for throughput.
+// The latency for loads is (5+VLSU_MEM_LATENCY+(DLEN/EEW)).
+// The throughput for loads and stores is (VL+EMUL-1).
+foreach mx = SchedMxList in {
+ defvar Cycles = Andes45GetCyclesOnePerElement<mx, 8>.c;
+ defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
+
+ foreach eew = [8, 16, 32, 64] in {
+ let Latency = !add(5, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))),
+ ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue<mx>.c, 1))] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ }
+
+ let ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue<mx>.c, 1))] in {
+ defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ }
+ }
+}
+
+// Unit-Stride Segmented Loads and Stores
+
+// The latency for loads is (4+VLSU_MEM_LATENCY+EMUL* NFIELDS+2)
+// The throughput for loads and stores is (VLEN/VLSU_MEM_DW)*EMUL*NFIELDS.
+foreach mx = SchedMxList in {
+ foreach nf=2-8 in {
+ foreach eew = [8, 16, 32, 64] in {
+ defvar Cycles = Andes45GetCyclesLoadStore<mx>.c;
+ defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
+ defvar Size = !mul(Andes45GetLMULValue<mx>.c, nf);
+
+ let Latency = !add(4, !add(VLSU_MEM_LATENCY, !add(Size, 2))),
+ ReleaseAtCycles = [!mul(Cycles, nf)] in {
+ defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ }
+ // TODO
----------------
tclin914 wrote:
Remove it. Thanks.
https://github.com/llvm/llvm-project/pull/173806
More information about the llvm-commits
mailing list