[llvm] [TableGen] Allow empty terminator in SequenceToOffsetTable (PR #119751)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 12 12:02:04 PST 2024
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/119751
>From 2ecc51c1f08b078dce441cfb343464044a139458 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Thu, 12 Dec 2024 22:15:21 +0300
Subject: [PATCH] [TableGen] Allow empty terminator in SequenceToOffsetTable
Some clients do not want to emit a terminator after each sub-sequence
(they have other means of determining the length of the sub-sequence).
This moves `Term` argument from `emit` method to the constructor
and makes it optional. It couldn't be made optional while still
on the `emit` method because if the terminator wasn't specified,
it has to be taken into account in `layout` method as well.
The fact that `layout` method was called is now recorded in a dedicated
member variable, `IsLaidOut`. `Entries != 0` can no longer be used
to reliably check if `layout` method was called because it may be zero
for a different reason: the terminator wasn't specified and all added
sequences (if any) were empty.
This reduces the size of `*LaneMaskLists` and `*SubRegIdxLists` a bit
and resolves the removed FIXME.
---
llvm/utils/TableGen/AsmWriterEmitter.cpp | 4 +-
.../TableGen/Basic/SequenceToOffsetTable.h | 47 ++++++++++++-------
llvm/utils/TableGen/DFAEmitter.cpp | 12 ++---
llvm/utils/TableGen/DXILEmitter.cpp | 4 +-
llvm/utils/TableGen/InstrInfoEmitter.cpp | 2 +-
llvm/utils/TableGen/IntrinsicEmitter.cpp | 2 +-
llvm/utils/TableGen/RegisterInfoEmitter.cpp | 29 ++++++------
7 files changed, 56 insertions(+), 44 deletions(-)
diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp
index 9880214a37368f..64cc80afd0900e 100644
--- a/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -338,7 +338,7 @@ void AsmWriterEmitter::EmitGetMnemonic(
<< "::getMnemonic(const MCInst &MI) const {\n";
// Build an aggregate string, and build a table of offsets into it.
- SequenceToOffsetTable<std::string> StringTable;
+ SequenceToOffsetTable<std::string> StringTable(/*Terminator=*/'\0');
/// OpcodeInfo - This encodes the index of the string to use for the first
/// chunk of the output as well as indices used for operand printing.
@@ -583,7 +583,7 @@ void AsmWriterEmitter::EmitPrintInstruction(
static void
emitRegisterNameString(raw_ostream &O, StringRef AltName,
const std::deque<CodeGenRegister> &Registers) {
- SequenceToOffsetTable<std::string> StringTable;
+ SequenceToOffsetTable<std::string> StringTable(/*Terminator=*/'\0');
SmallVector<std::string, 4> AsmNames(Registers.size());
unsigned i = 0;
for (const auto &Reg : Registers) {
diff --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
index 497e74afc18ec9..4e1388d3e45285 100644
--- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
+++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
@@ -6,9 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// SequenceToOffsetTable can be used to emit a number of null-terminated
-// sequences as one big array. Use the same memory when a sequence is a suffix
-// of another.
+// SequenceToOffsetTable can be used to emit a number of sequences as one big
+// array. Uses the same memory when a sequence is a suffix of another.
//
//===----------------------------------------------------------------------===//
@@ -65,8 +64,14 @@ class SequenceToOffsetTable {
// Sequences added so far, with suffixes removed.
SeqMap Seqs;
+ // Terminator element to be appended to each added sequence.
+ std::optional<ElemT> Terminator;
+
+ // True if `layout` method was called.
+ bool IsLaidOut = false;
+
// Entries in the final table, or 0 before layout was called.
- unsigned Entries;
+ unsigned Entries = 0;
// isSuffix - Returns true if A is a suffix of B.
static bool isSuffix(const SeqT &A, const SeqT &B) {
@@ -74,12 +79,13 @@ class SequenceToOffsetTable {
}
public:
- SequenceToOffsetTable() : Entries(0) {}
+ explicit SequenceToOffsetTable(std::optional<ElemT> Terminator)
+ : Terminator(Terminator) {}
/// add - Add a sequence to the table.
/// This must be called before layout().
void add(const SeqT &Seq) {
- assert(Entries == 0 && "Cannot call add() after layout()");
+ assert(!IsLaidOut && "Cannot call add() after layout()");
typename SeqMap::iterator I = Seqs.lower_bound(Seq);
// If SeqMap contains a sequence that has Seq as a suffix, I will be
@@ -97,25 +103,27 @@ class SequenceToOffsetTable {
bool empty() const { return Seqs.empty(); }
unsigned size() const {
- assert((empty() || Entries) && "Call layout() before size()");
+ assert(IsLaidOut && "Call layout() before size()");
return Entries;
}
/// layout - Computes the final table layout.
void layout() {
- assert(Entries == 0 && "Can only call layout() once");
+ assert(!IsLaidOut && "Can only call layout() once");
+ IsLaidOut = true;
+
// Lay out the table in Seqs iteration order.
for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E;
++I) {
I->second = Entries;
// Include space for a terminator.
- Entries += I->first.size() + 1;
+ Entries += I->first.size() + Terminator.has_value();
}
}
/// get - Returns the offset of Seq in the final table.
unsigned get(const SeqT &Seq) const {
- assert(Entries && "Call layout() before get()");
+ assert(IsLaidOut && "Call layout() before get()");
typename SeqMap::const_iterator I = Seqs.lower_bound(Seq);
assert(I != Seqs.end() && isSuffix(Seq, I->first) &&
"get() called with sequence that wasn't added first");
@@ -127,10 +135,10 @@ class SequenceToOffsetTable {
/// `\0`. Falls back to emitting a comma-separated integer list if
/// `EmitLongStrLiterals` is false
void emitStringLiteralDef(raw_ostream &OS, const Twine &Decl) const {
- assert(Entries && "Call layout() before emitStringLiteralDef()");
+ assert(IsLaidOut && "Call layout() before emitStringLiteralDef()");
if (!EmitLongStrLiterals) {
OS << Decl << " = {\n";
- emit(OS, printChar, "0");
+ emit(OS, printChar);
OS << " 0\n};\n\n";
return;
}
@@ -143,7 +151,9 @@ class SequenceToOffsetTable {
for (const auto &[Seq, Offset] : Seqs) {
OS << " /* " << Offset << " */ \"";
OS.write_escaped(Seq);
- OS << "\\0\"\n";
+ if (Terminator)
+ OS.write_escaped(StringRef(&*Terminator, 1));
+ OS << "\"\n";
}
OS << "};\n"
<< "#ifdef __GNUC__\n"
@@ -153,16 +163,19 @@ class SequenceToOffsetTable {
/// emit - Print out the table as the body of an array initializer.
/// Use the Print function to print elements.
- void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT),
- const char *Term = "0") const {
- assert((empty() || Entries) && "Call layout() before emit()");
+ void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT)) const {
+ assert(IsLaidOut && "Call layout() before emit()");
for (const auto &[Seq, Offset] : Seqs) {
OS << " /* " << Offset << " */ ";
for (const ElemT &Element : Seq) {
Print(OS, Element);
OS << ", ";
}
- OS << Term << ",\n";
+ if (Terminator) {
+ Print(OS, *Terminator);
+ OS << ',';
+ }
+ OS << '\n';
}
}
};
diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp
index 264cccf6ac0ca6..a42c615879ba74 100644
--- a/llvm/utils/TableGen/DFAEmitter.cpp
+++ b/llvm/utils/TableGen/DFAEmitter.cpp
@@ -117,19 +117,17 @@ void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {
OS << "// transition implies a set of NFA transitions. These are referred\n";
OS << "// to by index in " << Name << "Transitions[].\n";
- SequenceToOffsetTable<DfaTransitionInfo> Table;
+ SequenceToOffsetTable<DfaTransitionInfo> Table(
+ /*Terminator=*/std::pair(0, 0));
std::map<DfaTransitionInfo, unsigned> EmittedIndices;
for (auto &T : DfaTransitions)
Table.add(T.second.second);
Table.layout();
OS << "const std::array<NfaStatePair, " << Table.size() << "> " << Name
<< "TransitionInfo = {{\n";
- Table.emit(
- OS,
- [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
- OS << "{" << P.first << ", " << P.second << "}";
- },
- "{0ULL, 0ULL}");
+ Table.emit(OS, [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
+ OS << "{" << P.first << ", " << P.second << "}";
+ });
OS << "}};\n\n";
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index a0c93bed5ad834..b54949ef2dc1ad 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -449,8 +449,8 @@ static void emitDXILIntrinsicArgSelectTypes(const RecordKeeper &Records,
static void emitDXILOperationTable(ArrayRef<DXILOperationDesc> Ops,
raw_ostream &OS) {
// Collect Names.
- SequenceToOffsetTable<std::string> OpClassStrings;
- SequenceToOffsetTable<std::string> OpStrings;
+ SequenceToOffsetTable<std::string> OpClassStrings(/*Terminator=*/'\0');
+ SequenceToOffsetTable<std::string> OpStrings(/*Terminator=*/'\0');
StringSet<> ClassSet;
for (const auto &Op : Ops) {
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 8c0e27215a7362..912a96c40f70e0 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -988,7 +988,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
OS << "extern const " << TargetName << "InstrTable " << TargetName
<< "Descs = {\n {\n";
- SequenceToOffsetTable<std::string> InstrNames;
+ SequenceToOffsetTable<std::string> InstrNames(/*Terminator=*/'\0');
unsigned Num = NumberedInstructions.size();
for (const CodeGenInstruction *Inst : reverse(NumberedInstructions)) {
// Keep a list of the instruction names.
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 093602c3da8045..a0ad8e561aa482 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -338,7 +338,7 @@ void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
// If we can compute a 16/32-bit fixed encoding for this intrinsic, do so and
// capture it in this vector, otherwise store a ~0U.
std::vector<FixedEncodingTy> FixedEncodings;
- SequenceToOffsetTable<TypeSigTy> LongEncodingTable;
+ SequenceToOffsetTable<TypeSigTy> LongEncodingTable(/*Terminator=*/0);
FixedEncodings.reserve(Ints.size());
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index bfcd52da1c39cb..98d752bc07e297 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -288,7 +288,7 @@ void RegisterInfoEmitter::EmitRegUnitPressure(raw_ostream &OS,
<< " return PressureLimitTable[Idx];\n"
<< "}\n\n";
- SequenceToOffsetTable<std::vector<int>> PSetsSeqs;
+ SequenceToOffsetTable<std::vector<int>> PSetsSeqs(/*Terminator=*/-1);
// This table may be larger than NumRCs if some register units needed a list
// of unit sets that did not correspond to a register class.
@@ -309,7 +309,7 @@ void RegisterInfoEmitter::EmitRegUnitPressure(raw_ostream &OS,
OS << "/// Table of pressure sets per register class or unit.\n"
<< "static const int RCSetsTable[] = {\n";
- PSetsSeqs.emit(OS, printInt, "-1");
+ PSetsSeqs.emit(OS, printInt);
OS << "};\n\n";
OS << "/// Get the dimensions of register pressure impacted by this "
@@ -610,7 +610,7 @@ static void printSimpleValueType(raw_ostream &OS, MVT::SimpleValueType VT) {
}
static void printSubRegIndex(raw_ostream &OS, const CodeGenSubRegIndex *Idx) {
- OS << Idx->EnumValue;
+ OS << (Idx ? Idx->EnumValue : 0);
}
// Differentially encoded register and regunit lists allow for better
@@ -869,22 +869,23 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS) {
typedef std::vector<const CodeGenRegister *> RegVec;
// Differentially encoded lists.
- SequenceToOffsetTable<DiffVec> DiffSeqs;
+ SequenceToOffsetTable<DiffVec> DiffSeqs(/*Terminator=*/0);
SmallVector<DiffVec, 4> SubRegLists(Regs.size());
SmallVector<DiffVec, 4> SuperRegLists(Regs.size());
SmallVector<DiffVec, 4> RegUnitLists(Regs.size());
// List of lane masks accompanying register unit sequences.
- SequenceToOffsetTable<MaskVec> LaneMaskSeqs;
+ SequenceToOffsetTable<MaskVec> LaneMaskSeqs(/*Terminator=*/std::nullopt);
SmallVector<MaskVec, 4> RegUnitLaneMasks(Regs.size());
// Keep track of sub-register names as well. These are not differentially
// encoded.
typedef SmallVector<const CodeGenSubRegIndex *, 4> SubRegIdxVec;
- SequenceToOffsetTable<SubRegIdxVec, deref<std::less<>>> SubRegIdxSeqs;
+ SequenceToOffsetTable<SubRegIdxVec, deref<std::less<>>> SubRegIdxSeqs(
+ /*Terminator=*/std::nullopt);
SmallVector<SubRegIdxVec, 4> SubRegIdxLists(Regs.size());
- SequenceToOffsetTable<std::string> RegStrings;
+ SequenceToOffsetTable<std::string> RegStrings(/*Terminator=*/'\0');
// Precompute register lists for the SequenceToOffsetTable.
unsigned i = 0;
@@ -936,9 +937,7 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS) {
// Emit the shared table of regunit lane mask sequences.
OS << "extern const LaneBitmask " << TargetName << "LaneMaskLists[] = {\n";
- // TODO: Omit the terminator since it is never used. The length of this list
- // is known implicitly from the corresponding reg unit list.
- LaneMaskSeqs.emit(OS, printMask, "LaneBitmask::getAll()");
+ LaneMaskSeqs.emit(OS, printMask);
OS << "};\n\n";
// Emit the table of sub-register indexes.
@@ -994,7 +993,7 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS) {
// Loop over all of the register classes... emitting each one.
OS << "namespace { // Register classes...\n";
- SequenceToOffsetTable<std::string> RegClassStrings;
+ SequenceToOffsetTable<std::string> RegClassStrings(/*Terminator=*/'\0');
// Emit the register enum value arrays for each RegisterClass
for (const auto &RC : RegisterClasses) {
@@ -1209,7 +1208,8 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
unsigned NumModes = CGH.getNumModeIds();
// Build a shared array of value types.
- SequenceToOffsetTable<std::vector<MVT::SimpleValueType>> VTSeqs;
+ SequenceToOffsetTable<std::vector<MVT::SimpleValueType>> VTSeqs(
+ /*Terminator=*/MVT::Other);
for (unsigned M = 0; M < NumModes; ++M) {
for (const auto &RC : RegisterClasses) {
std::vector<MVT::SimpleValueType> S;
@@ -1221,7 +1221,7 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
}
VTSeqs.layout();
OS << "\nstatic const MVT::SimpleValueType VTLists[] = {\n";
- VTSeqs.emit(OS, printSimpleValueType, "MVT::Other");
+ VTSeqs.emit(OS, printSimpleValueType);
OS << "};\n";
// Emit SubRegIndex names, skipping 0.
@@ -1307,7 +1307,8 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
// Compress the sub-reg index lists.
typedef std::vector<const CodeGenSubRegIndex *> IdxList;
SmallVector<IdxList, 8> SuperRegIdxLists(RegisterClasses.size());
- SequenceToOffsetTable<IdxList, deref<std::less<>>> SuperRegIdxSeqs;
+ SequenceToOffsetTable<IdxList, deref<std::less<>>> SuperRegIdxSeqs(
+ /*Terminator=*/nullptr);
BitVector MaskBV(RegisterClasses.size());
for (const auto &RC : RegisterClasses) {
More information about the llvm-commits
mailing list