[llvm] [TableGen] Add support for per-write cycle tunables (PR #125870)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 13 03:05:10 PST 2025
https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/125870
>From 6eddb57953e186f97f10934022fcc9e96d0a0835 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 23 Jan 2025 04:04:21 -0800
Subject: [PATCH 1/6] Pre-commit test
---
llvm/test/TableGen/PreWriteCycleCount.td | 39 ++++++++++++++++++++++++
1 file changed, 39 insertions(+)
create mode 100644 llvm/test/TableGen/PreWriteCycleCount.td
diff --git a/llvm/test/TableGen/PreWriteCycleCount.td b/llvm/test/TableGen/PreWriteCycleCount.td
new file mode 100644
index 0000000000000..d3e7006795cff
--- /dev/null
+++ b/llvm/test/TableGen/PreWriteCycleCount.td
@@ -0,0 +1,39 @@
+// RUN: llvm-tblgen -gen-subtarget -I %p/../../include %s 2>&1 | FileCheck %s
+
+// Make sure that ReadAdvance entries with multiple writes are correctly
+// handled.
+
+include "llvm/Target/Target.td"
+
+def MyTarget : Target;
+
+let OutOperandList = (outs), InOperandList = (ins) in {
+ def Inst_A : Instruction;
+ def Inst_B : Instruction;
+ def Inst_C : Instruction;
+}
+
+let CompleteModel = 0 in {
+ def SchedModel_A: SchedMachineModel;
+}
+
+def Read_D : SchedRead;
+
+// CHECK: extern const llvm::MCReadAdvanceEntry MyTargetReadAdvanceTable[] = {
+// CHECK-NEXT: {0, 0, 0}, // Invalid
+// CHECK-NEXT: {0, 3, 2} // #1
+// CHECK-NEXT: }; // MyTargetReadAdvanceTable
+
+let SchedModel = SchedModel_A in {
+ def Write_A : SchedWriteRes<[]>;
+ def Write_B : SchedWriteRes<[]>;
+ def Write_C : SchedWriteRes<[]>;
+
+ def : InstRW<[Write_A], (instrs Inst_A)>;
+ def : InstRW<[Write_B], (instrs Inst_B)>;
+ def : InstRW<[Write_C, Read_D], (instrs Inst_C)>;
+
+ def : ReadAdvance<Read_D, 2, [Write_C]>;
+}
+
+def ProcessorA: ProcessorModel<"ProcessorA", SchedModel_A, []>;
>From 2c41e9329a575ba4aed232276a19f31fcb26015d Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 22 Jan 2025 10:26:22 -0800
Subject: [PATCH 2/6] [TableGen] Add support for per-write cycle tunables
This patch adds support for describing per-write resource cycle counts
for ReadAdvance records via a new optional field called `tunables'.
This makes it possible to declare ReadAdvance records such as:
def : ReadAdvance<Read_C, 1, [Write_A, Write_B], [2]>;
The above will effectivelly declare two entries in the ReadAdvance
table for Read_C, one for Write_A with a cycle count of 1+2, and one for
Write_B with a cycle count of 1+0 (omitted values are assumed 0).
The field `tunables' provides a list of deltas relative to the base
`cycle' count of the ReadAdvance. Since the field is optional and
defaults to a list of 0's, this change doesn't affect current targets.
---
llvm/include/llvm/Target/TargetSchedule.td | 14 +++++++++-----
llvm/test/TableGen/PreWriteCycleCount.td | 6 ++++--
llvm/utils/TableGen/SubtargetEmitter.cpp | 19 +++++++++++++------
3 files changed, 26 insertions(+), 13 deletions(-)
diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td
index 2562ed0901303..4b82c1e2410fc 100644
--- a/llvm/include/llvm/Target/TargetSchedule.td
+++ b/llvm/include/llvm/Target/TargetSchedule.td
@@ -321,9 +321,11 @@ class SchedWriteRes<list<ProcResourceKind> resources> : SchedWrite,
// Define values common to ReadAdvance and SchedReadAdvance.
//
// SchedModel ties these resources to a processor.
-class ProcReadAdvance<int cycles, list<SchedWrite> writes = []> {
+class ProcReadAdvance<int cycles, list<SchedWrite> writes = [],
+ list<int> tunables = []> {
int Cycles = cycles;
list<SchedWrite> ValidWrites = writes;
+ list<int> CycleTunables = tunables;
// Allow a processor to mark some scheduling classes as unsupported
// for stronger verification.
bit Unsupported = false;
@@ -340,15 +342,17 @@ class ProcReadAdvance<int cycles, list<SchedWrite> writes = []> {
// indicate operands that are always read this number of Cycles later
// than a normal register read, allowing the read's parent instruction
// to issue earlier relative to the writer.
-class ReadAdvance<SchedRead read, int cycles, list<SchedWrite> writes = []>
- : ProcReadAdvance<cycles, writes> {
+class ReadAdvance<SchedRead read, int cycles, list<SchedWrite> writes = [],
+ list<int> tunables = []>
+ : ProcReadAdvance<cycles, writes, tunables> {
SchedRead ReadType = read;
}
// Directly associate a new SchedRead type with a delay and optional
// pipeline bypass. For use with InstRW or ItinRW.
-class SchedReadAdvance<int cycles, list<SchedWrite> writes = []> : SchedRead,
- ProcReadAdvance<cycles, writes>;
+class SchedReadAdvance<int cycles, list<SchedWrite> writes = [],
+ list<int> tunables = []>
+ : SchedRead, ProcReadAdvance<cycles, writes, tunables>;
// Define SchedRead defaults. Reads seldom need special treatment.
def ReadDefault : SchedRead;
diff --git a/llvm/test/TableGen/PreWriteCycleCount.td b/llvm/test/TableGen/PreWriteCycleCount.td
index d3e7006795cff..40260a9bacef4 100644
--- a/llvm/test/TableGen/PreWriteCycleCount.td
+++ b/llvm/test/TableGen/PreWriteCycleCount.td
@@ -21,7 +21,9 @@ def Read_D : SchedRead;
// CHECK: extern const llvm::MCReadAdvanceEntry MyTargetReadAdvanceTable[] = {
// CHECK-NEXT: {0, 0, 0}, // Invalid
-// CHECK-NEXT: {0, 3, 2} // #1
+// CHECK-NEXT: {0, 1, 1}, // #1
+// CHECK-NEXT: {0, 2, 3}, // #2
+// CHECK-NEXT: {0, 3, 2} // #3
// CHECK-NEXT: }; // MyTargetReadAdvanceTable
let SchedModel = SchedModel_A in {
@@ -33,7 +35,7 @@ let SchedModel = SchedModel_A in {
def : InstRW<[Write_B], (instrs Inst_B)>;
def : InstRW<[Write_C, Read_D], (instrs Inst_C)>;
- def : ReadAdvance<Read_D, 2, [Write_C]>;
+ def : ReadAdvance<Read_D, 2, [Write_A, Write_B, Write_C], [-1, 1]>;
}
def ProcessorA: ProcessorModel<"ProcessorA", SchedModel_A, []>;
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index 49362ff5ef655..ec09902927382 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -1308,23 +1308,30 @@ void SubtargetEmitter::genSchedClassTables(const CodeGenProcModel &ProcModel,
}
ConstRecVec ValidWrites =
ReadAdvance->getValueAsListOfDefs("ValidWrites");
- IdxVec WriteIDs;
+ std::vector<int64_t> CycleTunables =
+ ReadAdvance->getValueAsListOfInts("CycleTunables");
+ std::vector<std::pair<unsigned, int>> WriteIDs;
+ if (!CycleTunables.empty() && CycleTunables.size() > ValidWrites.size())
+ PrintFatalError(ReadAdvance->getLoc(),
+ "If specified, CycleTunables must have at most the "
+ "same number of elements of ValidWrites.\n");
+ CycleTunables.resize(ValidWrites.size(), 0);
if (ValidWrites.empty())
- WriteIDs.push_back(0);
+ WriteIDs.push_back(std::make_pair(0, 0));
else {
- for (const Record *VW : ValidWrites) {
+ for (const auto [VW, CT] : zip_equal(ValidWrites, CycleTunables)) {
unsigned WriteID = SchedModels.getSchedRWIdx(VW, /*IsRead=*/false);
assert(WriteID != 0 &&
"Expected a valid SchedRW in the list of ValidWrites");
- WriteIDs.push_back(WriteID);
+ WriteIDs.push_back(std::make_pair(WriteID, CT));
}
}
llvm::sort(WriteIDs);
- for (unsigned W : WriteIDs) {
+ for (const auto &[W, T] : WriteIDs) {
MCReadAdvanceEntry RAEntry;
RAEntry.UseIdx = UseIdx;
RAEntry.WriteResourceID = W;
- RAEntry.Cycles = ReadAdvance->getValueAsInt("Cycles");
+ RAEntry.Cycles = ReadAdvance->getValueAsInt("Cycles") + T;
ReadAdvanceEntries.push_back(RAEntry);
}
}
>From 6d553fedf7657a2ce3ca26916362840c9697d8cc Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 5 Feb 2025 15:41:03 +0000
Subject: [PATCH 3/6] Fix typo in name
---
.../TableGen/{PreWriteCycleCount.td => PerWriteCycleCount.td} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename llvm/test/TableGen/{PreWriteCycleCount.td => PerWriteCycleCount.td} (100%)
diff --git a/llvm/test/TableGen/PreWriteCycleCount.td b/llvm/test/TableGen/PerWriteCycleCount.td
similarity index 100%
rename from llvm/test/TableGen/PreWriteCycleCount.td
rename to llvm/test/TableGen/PerWriteCycleCount.td
>From c84d487438d1b6ab4850b83527595f2e95b72f07 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 5 Feb 2025 10:02:27 -0800
Subject: [PATCH 4/6] Remove redundant !CycleTunables.empty()
---
llvm/utils/TableGen/SubtargetEmitter.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index ec09902927382..e0c97b97516c9 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -1311,7 +1311,7 @@ void SubtargetEmitter::genSchedClassTables(const CodeGenProcModel &ProcModel,
std::vector<int64_t> CycleTunables =
ReadAdvance->getValueAsListOfInts("CycleTunables");
std::vector<std::pair<unsigned, int>> WriteIDs;
- if (!CycleTunables.empty() && CycleTunables.size() > ValidWrites.size())
+ if (CycleTunables.size() > ValidWrites.size())
PrintFatalError(ReadAdvance->getLoc(),
"If specified, CycleTunables must have at most the "
"same number of elements of ValidWrites.\n");
>From 5ed15458d336dfeb4b82b1239938c5952ef7935d Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 12 Feb 2025 19:33:00 +0000
Subject: [PATCH 5/6] Use emplace_back instead of push_back(make_pair)
---
llvm/utils/TableGen/SubtargetEmitter.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index e0c97b97516c9..0becc7570e551 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -1317,13 +1317,13 @@ void SubtargetEmitter::genSchedClassTables(const CodeGenProcModel &ProcModel,
"same number of elements of ValidWrites.\n");
CycleTunables.resize(ValidWrites.size(), 0);
if (ValidWrites.empty())
- WriteIDs.push_back(std::make_pair(0, 0));
+ WriteIDs.emplace_back(0, 0);
else {
for (const auto [VW, CT] : zip_equal(ValidWrites, CycleTunables)) {
unsigned WriteID = SchedModels.getSchedRWIdx(VW, /*IsRead=*/false);
assert(WriteID != 0 &&
"Expected a valid SchedRW in the list of ValidWrites");
- WriteIDs.push_back(std::make_pair(WriteID, CT));
+ WriteIDs.emplace_back(WriteID, CT);
}
}
llvm::sort(WriteIDs);
>From c3e71f03dbaf1b8b290a7491028fc8dfc45ca0c1 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 12 Feb 2025 20:08:07 +0000
Subject: [PATCH 6/6] Assert `tunables' in ProcReadAdvance and add test
---
llvm/include/llvm/Target/TargetSchedule.td | 2 ++
llvm/test/TableGen/PerWriteCycleCount.td | 7 +++++++
llvm/utils/TableGen/SubtargetEmitter.cpp | 5 +----
3 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td
index 4b82c1e2410fc..f55bff16dcecd 100644
--- a/llvm/include/llvm/Target/TargetSchedule.td
+++ b/llvm/include/llvm/Target/TargetSchedule.td
@@ -323,6 +323,8 @@ class SchedWriteRes<list<ProcResourceKind> resources> : SchedWrite,
// SchedModel ties these resources to a processor.
class ProcReadAdvance<int cycles, list<SchedWrite> writes = [],
list<int> tunables = []> {
+ assert !le(!size(tunables), !size(writes)),
+ "cannot have more `tunables' than `writes'";
int Cycles = cycles;
list<SchedWrite> ValidWrites = writes;
list<int> CycleTunables = tunables;
diff --git a/llvm/test/TableGen/PerWriteCycleCount.td b/llvm/test/TableGen/PerWriteCycleCount.td
index 40260a9bacef4..ac60d8c438834 100644
--- a/llvm/test/TableGen/PerWriteCycleCount.td
+++ b/llvm/test/TableGen/PerWriteCycleCount.td
@@ -1,4 +1,5 @@
// RUN: llvm-tblgen -gen-subtarget -I %p/../../include %s 2>&1 | FileCheck %s
+// RUN: not llvm-tblgen -gen-subtarget -I %p/../../include -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
// Make sure that ReadAdvance entries with multiple writes are correctly
// handled.
@@ -18,6 +19,7 @@ let CompleteModel = 0 in {
}
def Read_D : SchedRead;
+def Read_E : SchedRead;
// CHECK: extern const llvm::MCReadAdvanceEntry MyTargetReadAdvanceTable[] = {
// CHECK-NEXT: {0, 0, 0}, // Invalid
@@ -36,6 +38,11 @@ let SchedModel = SchedModel_A in {
def : InstRW<[Write_C, Read_D], (instrs Inst_C)>;
def : ReadAdvance<Read_D, 2, [Write_A, Write_B, Write_C], [-1, 1]>;
+
+#ifdef ERROR1
+// ERROR1: error: assertion failed: cannot have more `tunables' than `writes'
+ def : ReadAdvance<Read_E, 2, [Write_A, Write_B, Write_C], [1, 2, 3, 4]>;
+#endif
}
def ProcessorA: ProcessorModel<"ProcessorA", SchedModel_A, []>;
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index 0becc7570e551..aec05f1ae7742 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -1311,10 +1311,7 @@ void SubtargetEmitter::genSchedClassTables(const CodeGenProcModel &ProcModel,
std::vector<int64_t> CycleTunables =
ReadAdvance->getValueAsListOfInts("CycleTunables");
std::vector<std::pair<unsigned, int>> WriteIDs;
- if (CycleTunables.size() > ValidWrites.size())
- PrintFatalError(ReadAdvance->getLoc(),
- "If specified, CycleTunables must have at most the "
- "same number of elements of ValidWrites.\n");
+ assert(CycleTunables.size() <= ValidWrites.size() && "Bad ReadAdvance");
CycleTunables.resize(ValidWrites.size(), 0);
if (ValidWrites.empty())
WriteIDs.emplace_back(0, 0);
More information about the llvm-commits
mailing list