[llvm] 093bc6b - [RISCV] SiFive7 VLDS Sched should not depend on VL when stride is x0. (#70266)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 30 12:47:48 PDT 2023
Author: Michael Maitland
Date: 2023-10-30T15:47:45-04:00
New Revision: 093bc6b61a6c87e138a4bf89fe620f6e63d20eda
URL: https://github.com/llvm/llvm-project/commit/093bc6b61a6c87e138a4bf89fe620f6e63d20eda
DIFF: https://github.com/llvm/llvm-project/commit/093bc6b61a6c87e138a4bf89fe620f6e63d20eda.diff
LOG: [RISCV] SiFive7 VLDS Sched should not depend on VL when stride is x0. (#70266)
When stride is x0, a strided load should behave like a unit stride load,
which uses the VLDE sched class.
---------
Co-authored-by: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Added:
llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s
Modified:
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
llvm/lib/Target/RISCV/RISCVScheduleV.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index d2447cf23e266c6..9da68dc9a139d32 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -455,11 +455,19 @@ foreach mx = SchedMxList in {
// specific suffixes, but since SEW is already encoded in the name of the
// resource, we do not need to use LMULSEWXXX constructors. However, we do
// use the SEW from the name to determine the number of Cycles.
+
+// This predicate is true when the rs2 operand of vlse or vsse is x0, false
+// otherwise.
+def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>;
+
foreach mx = SchedMxList in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VL],
+ 4, [VLDSX0Cycles], !add(3, Cycles),
+ [Cycles], mx, IsWorstCase>;
let Latency = !add(3, Cycles), ReleaseAtCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS8", [SiFive7VL], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VL], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VL], mx, IsWorstCase>;
}
@@ -469,11 +477,17 @@ foreach mx = SchedMxList in {
defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VS], mx, IsWorstCase>;
}
}
-foreach mx = SchedMxList in {
+// TODO: The MxLists need to be filtered by EEW. We only need to support
+// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
+// since LMUL >= 16/64.
+foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VL],
+ 4, [VLDSX0Cycles], !add(3, Cycles),
+ [Cycles], mx, IsWorstCase>;
let Latency = !add(3, Cycles), ReleaseAtCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS16", [SiFive7VL], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VL], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VL], mx, IsWorstCase>;
}
@@ -483,11 +497,14 @@ foreach mx = SchedMxList in {
defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VS], mx, IsWorstCase>;
}
}
-foreach mx = SchedMxList in {
+foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VL],
+ 4, [VLDSX0Cycles], !add(3, Cycles),
+ [Cycles], mx, IsWorstCase>;
let Latency = !add(3, Cycles), ReleaseAtCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS32", [SiFive7VL], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VL], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VL], mx, IsWorstCase>;
}
@@ -497,11 +514,14 @@ foreach mx = SchedMxList in {
defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VS], mx, IsWorstCase>;
}
}
-foreach mx = SchedMxList in {
+foreach mx = ["M1", "M2", "M4", "M8"] in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VL],
+ 4, [VLDSX0Cycles], !add(3, Cycles),
+ [Cycles], mx, IsWorstCase>;
let Latency = !add(3, Cycles), ReleaseAtCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS64", [SiFive7VL], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VL], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VL], mx, IsWorstCase>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 7af7716c96b8564..b5ddb8197993bf2 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -62,6 +62,48 @@ multiclass LMULSEWWriteResMXSEW<string name, list<ProcResourceKind> resources,
def : WriteRes<!cast<SchedWrite>(name # "_WorstCase"), resources>;
}
+// Define a SchedAlias for the SchedWrite associated with (name, mx) whose
+// behavior is aliased to a Variant. The Variant has Latency predLad and
+// ReleaseAtCycles predCycles if the SchedPredicate Pred is true, otherwise has
+// Latency noPredLat and ReleaseAtCycles noPredCycles. The WorstCase SchedWrite
+// is created similiarly if IsWorstCase is true.
+multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred,
+ list<ProcResourceKind> resources,
+ int predLat, list<int> predCycles,
+ int noPredLat, list<int> noPredCycles,
+ string mx, bit IsWorstCase> {
+ defvar nameMX = name # "_" # mx;
+
+ // Define the
diff erent behaviors
+ def NAME # nameMX # "_Pred" : SchedWriteRes<resources> {
+ let Latency = predLat;
+ let ReleaseAtCycles = predCycles;
+ }
+ def NAME # nameMX # "_NoPred" : SchedWriteRes<resources> {
+ let Latency = noPredLat;
+ let ReleaseAtCycles = noPredCycles;
+ }
+
+ // Tie behavior to predicate
+ def NAME # nameMX # "_Variant" : SchedWriteVariant<[
+ SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>,
+ SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX # "_NoPred")]>
+ ]>;
+ def : SchedAlias<
+ !cast<SchedReadWrite>(nameMX),
+ !cast<SchedReadWrite>(NAME # nameMX # "_Variant")>;
+
+ if IsWorstCase then {
+ def NAME # name # "_WorstCase_Variant" : SchedWriteVariant<[
+ SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>,
+ SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX # "_NoPred")]>
+ ]>;
+ def : SchedAlias<
+ !cast<SchedReadWrite>(name # "_WorstCase"),
+ !cast<SchedReadWrite>(NAME # name # "_WorstCase_Variant")>;
+ }
+}
+
// Define multiclasses to define SchedWrite, SchedRead, WriteRes, and
// ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the
// SchedMxList variants above. Each multiclass is responsible for defining
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s
new file mode 100644
index 000000000000000..8b52d0ece635931
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s
@@ -0,0 +1,125 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -debug -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 < %s | FileCheck %s
+
+vsetvli zero, zero, e32, m1, tu, mu
+
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vlse8.v v1, (a1), zero
+vlse16.v v1, (a1), zero
+vlse32.v v1, (a1), zero
+vlse64.v v1, (a1), zero
+
+vle8.v v1, (a1)
+vle16.v v1, (a1)
+vle32.v v1, (a1)
+vle64.v v1, (a1)
+
+vsetvli zero, zero, e64, m1, tu, mu
+
+vlse8.v v1, (a1), a2
+vlse16.v v1, (a1), a2
+vlse32.v v1, (a1), a2
+vlse64.v v1, (a1), a2
+
+vlse8.v v1, (a1), zero
+vlse16.v v1, (a1), zero
+vlse32.v v1, (a1), zero
+vlse64.v v1, (a1), zero
+
+vle8.v v1, (a1)
+vle16.v v1, (a1)
+vle32.v v1, (a1)
+vle64.v v1, (a1)
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 26
+# CHECK-NEXT: Total Cycles: 3523
+# CHECK-NEXT: Total uOps: 26
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.01
+# CHECK-NEXT: IPC: 0.01
+# CHECK-NEXT: Block RThroughput: 3517.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 515 512.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 259 256.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 19 16.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 67 64.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 515 512.00 * vlse8.v v1, (a1), zero
+# CHECK-NEXT: 1 259 256.00 * vlse16.v v1, (a1), zero
+# CHECK-NEXT: 1 19 16.00 * vlse32.v v1, (a1), zero
+# CHECK-NEXT: 1 67 64.00 * vlse64.v v1, (a1), zero
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a1)
+# CHECK-NEXT: 1 4 1.00 * vle16.v v1, (a1)
+# CHECK-NEXT: 1 4 2.00 * vle32.v v1, (a1)
+# CHECK-NEXT: 1 4 4.00 * vle64.v v1, (a1)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 515 512.00 * vlse8.v v1, (a1), a2
+# CHECK-NEXT: 1 259 256.00 * vlse16.v v1, (a1), a2
+# CHECK-NEXT: 1 131 128.00 * vlse32.v v1, (a1), a2
+# CHECK-NEXT: 1 11 8.00 * vlse64.v v1, (a1), a2
+# CHECK-NEXT: 1 515 512.00 * vlse8.v v1, (a1), zero
+# CHECK-NEXT: 1 259 256.00 * vlse16.v v1, (a1), zero
+# CHECK-NEXT: 1 131 128.00 * vlse32.v v1, (a1), zero
+# CHECK-NEXT: 1 11 8.00 * vlse64.v v1, (a1), zero
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a1)
+# CHECK-NEXT: 1 4 1.00 * vle16.v v1, (a1)
+# CHECK-NEXT: 1 4 1.00 * vle32.v v1, (a1)
+# CHECK-NEXT: 1 4 2.00 * vle64.v v1, (a1)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SiFive7FDiv
+# CHECK-NEXT: [1] - SiFive7IDiv
+# CHECK-NEXT: [2] - SiFive7PipeA
+# CHECK-NEXT: [3] - SiFive7PipeB
+# CHECK-NEXT: [4] - SiFive7PipeV
+# CHECK-NEXT: [5] - SiFive7VA
+# CHECK-NEXT: [6] - SiFive7VL
+# CHECK-NEXT: [7] - SiFive7VS
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
+# CHECK-NEXT: - - 2.00 - 3517.00 - 3517.00 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: - - - - 512.00 - 512.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - 256.00 - 256.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - 16.00 - 16.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - 64.00 - 64.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - - - 512.00 - 512.00 - vlse8.v v1, (a1), zero
+# CHECK-NEXT: - - - - 256.00 - 256.00 - vlse16.v v1, (a1), zero
+# CHECK-NEXT: - - - - 16.00 - 16.00 - vlse32.v v1, (a1), zero
+# CHECK-NEXT: - - - - 64.00 - 64.00 - vlse64.v v1, (a1), zero
+# CHECK-NEXT: - - - - 1.00 - 1.00 - vle8.v v1, (a1)
+# CHECK-NEXT: - - - - 1.00 - 1.00 - vle16.v v1, (a1)
+# CHECK-NEXT: - - - - 2.00 - 2.00 - vle32.v v1, (a1)
+# CHECK-NEXT: - - - - 4.00 - 4.00 - vle64.v v1, (a1)
+# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: - - - - 512.00 - 512.00 - vlse8.v v1, (a1), a2
+# CHECK-NEXT: - - - - 256.00 - 256.00 - vlse16.v v1, (a1), a2
+# CHECK-NEXT: - - - - 128.00 - 128.00 - vlse32.v v1, (a1), a2
+# CHECK-NEXT: - - - - 8.00 - 8.00 - vlse64.v v1, (a1), a2
+# CHECK-NEXT: - - - - 512.00 - 512.00 - vlse8.v v1, (a1), zero
+# CHECK-NEXT: - - - - 256.00 - 256.00 - vlse16.v v1, (a1), zero
+# CHECK-NEXT: - - - - 128.00 - 128.00 - vlse32.v v1, (a1), zero
+# CHECK-NEXT: - - - - 8.00 - 8.00 - vlse64.v v1, (a1), zero
+# CHECK-NEXT: - - - - 1.00 - 1.00 - vle8.v v1, (a1)
+# CHECK-NEXT: - - - - 1.00 - 1.00 - vle16.v v1, (a1)
+# CHECK-NEXT: - - - - 1.00 - 1.00 - vle32.v v1, (a1)
+# CHECK-NEXT: - - - - 2.00 - 2.00 - vle64.v v1, (a1)
More information about the llvm-commits
mailing list