[llvm] [TableGen] Minor cleanup in `StringToOffsetTable` (PR #147712)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 9 10:28:54 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-adt
Author: Rahul Joshi (jurahul)
<details>
<summary>Changes</summary>
Make `AppendZero` a class member instead of an argument to `GetOrAddStringOffset` to reflect the intended usage that for a given `StringToOffsetTable`, all strings must use the same value of `AppendZero`.
Modify `EmitStringTableDef` to drop the `Indent` argument as its always set to `""`, and to fail if it's called for a table with non-null-terminated strings.
---
Full diff: https://github.com/llvm/llvm-project/pull/147712.diff
8 Files Affected:
- (modified) llvm/include/llvm/ADT/StringMapEntry.h (+1-1)
- (modified) llvm/include/llvm/TableGen/StringToOffsetTable.h (+5-5)
- (modified) llvm/lib/TableGen/StringToOffsetTable.cpp (+17-12)
- (modified) llvm/test/TableGen/MixedCasedMnemonic.td (+1-1)
- (modified) llvm/test/TableGen/SDNodeInfoEmitter/basic.td (+2-2)
- (modified) llvm/utils/TableGen/AsmMatcherEmitter.cpp (+3-3)
- (modified) llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp (+1-1)
- (modified) llvm/utils/TableGen/OptionParserEmitter.cpp (+1-1)
``````````diff
diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h
index 842cf704de479..21be5ec343059 100644
--- a/llvm/include/llvm/ADT/StringMapEntry.h
+++ b/llvm/include/llvm/ADT/StringMapEntry.h
@@ -110,7 +110,7 @@ class StringMapEntry final : public StringMapEntryStorage<ValueTy> {
}
/// getKeyData - Return the start of the string data that is the key for this
- /// value. The string data is always stored immediately after the
+ /// value. The string data is always stored immediately after the
/// StringMapEntry object.
const char *getKeyData() const {
return reinterpret_cast<const char *>(this + 1);
diff --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h
index 21795644d4bd6..1550515ce3d7b 100644
--- a/llvm/include/llvm/TableGen/StringToOffsetTable.h
+++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h
@@ -23,9 +23,10 @@ namespace llvm {
class StringToOffsetTable {
StringMap<unsigned> StringOffset;
std::string AggregateString;
+ const bool AppendZero;
public:
- StringToOffsetTable() {
+ StringToOffsetTable(bool AppendZero = true) : AppendZero(AppendZero) {
// Ensure we always put the empty string at offset zero. That lets empty
// initialization also be zero initialization for offsets into the table.
GetOrAddStringOffset("");
@@ -34,7 +35,7 @@ class StringToOffsetTable {
bool empty() const { return StringOffset.empty(); }
size_t size() const { return AggregateString.size(); }
- unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true);
+ unsigned GetOrAddStringOffset(StringRef Str);
// Returns the offset of `Str` in the table if its preset, else return
// std::nullopt.
@@ -45,7 +46,7 @@ class StringToOffsetTable {
return II->second;
}
- // Emit a string table definition with the provided name and indent.
+ // Emit a string table definition with the provided name.
//
// When possible, this uses string-literal concatenation to emit the string
// contents in a readable and searchable way. However, for (very) large string
@@ -56,8 +57,7 @@ class StringToOffsetTable {
// The string table, and its input string contents, are always emitted as both
// `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be
// valid identifiers to declare.
- void EmitStringTableDef(raw_ostream &OS, const Twine &Name,
- const Twine &Indent = "") const;
+ void EmitStringTableDef(raw_ostream &OS, const Twine &Name) const;
// Emit the string as one single string.
void EmitString(raw_ostream &O) const;
diff --git a/llvm/lib/TableGen/StringToOffsetTable.cpp b/llvm/lib/TableGen/StringToOffsetTable.cpp
index 0db3f35f1c4b3..17e1e660c15ee 100644
--- a/llvm/lib/TableGen/StringToOffsetTable.cpp
+++ b/llvm/lib/TableGen/StringToOffsetTable.cpp
@@ -9,32 +9,37 @@
#include "llvm/TableGen/StringToOffsetTable.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Main.h"
using namespace llvm;
-unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str,
- bool appendZero) {
+unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str) {
auto [II, Inserted] = StringOffset.insert({Str, size()});
if (Inserted) {
// Add the string to the aggregate if this is the first time found.
AggregateString.append(Str.begin(), Str.end());
- if (appendZero)
+ if (AppendZero)
AggregateString += '\0';
}
return II->second;
}
-void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
- const Twine &Indent) const {
+void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS,
+ const Twine &Name) const {
+ // This generates a `llvm::StringTable` which expects that entries are null
+ // terminated. So fail with an error if `AppendZero` is false.
+ if (!AppendZero)
+ PrintFatalError("llvm::StringTable requires null terminated strings");
+
OS << formatv(R"(
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Woverlength-strings"
#endif
-{0}static constexpr char {1}Storage[] = )",
- Indent, Name);
+static constexpr char {}Storage[] = )",
+ Name);
// MSVC silently miscompiles string literals longer than 64k in some
// circumstances. The build system sets EmitLongStrLiterals to false when it
@@ -54,7 +59,7 @@ void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
"Expected empty string at the end due to terminators!");
Strings.pop_back();
for (StringRef Str : Strings) {
- OS << LineSep << Indent << " ";
+ OS << LineSep << " ";
// If we can, just emit this as a string literal to be concatenated.
if (!UseChars) {
OS << "\"";
@@ -71,17 +76,17 @@ void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
}
OS << CharSep << "'\\0'";
}
- OS << LineSep << Indent << (UseChars ? "};" : " ;");
+ OS << LineSep << (UseChars ? "};" : " ;");
OS << formatv(R"(
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
-{0}static constexpr llvm::StringTable {1} =
-{0} {1}Storage;
+static constexpr llvm::StringTable
+{0} = {0}Storage;
)",
- Indent, Name);
+ Name);
}
void StringToOffsetTable::EmitString(raw_ostream &O) const {
diff --git a/llvm/test/TableGen/MixedCasedMnemonic.td b/llvm/test/TableGen/MixedCasedMnemonic.td
index 14ab104c7e120..cb224ac59c6de 100644
--- a/llvm/test/TableGen/MixedCasedMnemonic.td
+++ b/llvm/test/TableGen/MixedCasedMnemonic.td
@@ -41,7 +41,7 @@ def :MnemonicAlias<"InstB", "BInst">;
// Check that the matcher lower()s the mnemonics it matches.
// MATCHER: static const char MnemonicTable[] =
-// MATCHER-NEXT: "\000\005ainst\005binst";
+// MATCHER-NEXT: "\005ainst\005binst";
// Check that aInst appears before BInst in the match table.
// This shows that the mnemonics are sorted in a case-insensitive way,
diff --git a/llvm/test/TableGen/SDNodeInfoEmitter/basic.td b/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
index b8bff520bbcaa..2b4c76a0d4543 100644
--- a/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
+++ b/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
@@ -35,8 +35,8 @@ def MyTarget : Target;
// CHECK-NEXT: #pragma GCC diagnostic pop
// CHECK-NEXT: #endif
// CHECK-EMPTY:
-// CHECK-NEXT: static constexpr llvm::StringTable MyTargetSDNodeNames =
-// CHECK-NEXT: MyTargetSDNodeNamesStorage;
+// CHECK-NEXT: static constexpr llvm::StringTable
+// CHECK-NEXT: MyTargetSDNodeNames = MyTargetSDNodeNamesStorage;
// CHECK-EMPTY:
// CHECK-NEXT: static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
// CHECK-NEXT: /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 6dec4110decbe..bfd158614ae39 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -3440,7 +3440,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (!ReportMultipleNearMisses)
emitAsmTiedOperandConstraints(Target, Info, OS, HasOptionalOperands);
- StringToOffsetTable StringTable;
+ StringToOffsetTable StringTable(/*AppendZero=*/false);
size_t MaxNumOperands = 0;
unsigned MaxMnemonicIndex = 0;
@@ -3451,8 +3451,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
// Store a pascal-style length byte in the mnemonic.
std::string LenMnemonic = char(MI->Mnemonic.size()) + MI->Mnemonic.lower();
- MaxMnemonicIndex = std::max(
- MaxMnemonicIndex, StringTable.GetOrAddStringOffset(LenMnemonic, false));
+ MaxMnemonicIndex = std::max(MaxMnemonicIndex,
+ StringTable.GetOrAddStringOffset(LenMnemonic));
}
OS << "static const char MnemonicTable[] =\n";
diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
index 2876a0e2ad546..ac5c455ed63ce 100644
--- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
@@ -252,7 +252,7 @@ void IntrinsicEmitter::EmitIntrinsicToNameTable(
)";
- Table.EmitStringTableDef(OS, "IntrinsicNameTable", /*Indent=*/"");
+ Table.EmitStringTableDef(OS, "IntrinsicNameTable");
OS << R"(
static constexpr unsigned IntrinsicNameOffsetTable[] = {
diff --git a/llvm/utils/TableGen/OptionParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp
index f4964282934eb..a470fbbcadd58 100644
--- a/llvm/utils/TableGen/OptionParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptionParserEmitter.cpp
@@ -291,7 +291,7 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
OS << "/////////\n";
OS << "// String table\n\n";
OS << "#ifdef OPTTABLE_STR_TABLE_CODE\n";
- Table.EmitStringTableDef(OS, "OptionStrTable", /*Indent=*/"");
+ Table.EmitStringTableDef(OS, "OptionStrTable");
OS << "#endif // OPTTABLE_STR_TABLE_CODE\n\n";
// Dump prefixes.
``````````
</details>
https://github.com/llvm/llvm-project/pull/147712
More information about the llvm-commits
mailing list