[llvm] [TableGen] Minor cleanup in `StringToOffsetTable` (PR #147712)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 9 10:28:54 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-adt

Author: Rahul Joshi (jurahul)

<details>
<summary>Changes</summary>

Make `AppendZero` a class member instead of an argument to `GetOrAddStringOffset` to reflect the intended usage that for a given `StringToOffsetTable`, all strings must use the same value of `AppendZero`.

Modify `EmitStringTableDef` to drop the `Indent` argument as its always set to `""`, and to fail if it's called for a table with non-null-terminated strings.

---
Full diff: https://github.com/llvm/llvm-project/pull/147712.diff


8 Files Affected:

- (modified) llvm/include/llvm/ADT/StringMapEntry.h (+1-1) 
- (modified) llvm/include/llvm/TableGen/StringToOffsetTable.h (+5-5) 
- (modified) llvm/lib/TableGen/StringToOffsetTable.cpp (+17-12) 
- (modified) llvm/test/TableGen/MixedCasedMnemonic.td (+1-1) 
- (modified) llvm/test/TableGen/SDNodeInfoEmitter/basic.td (+2-2) 
- (modified) llvm/utils/TableGen/AsmMatcherEmitter.cpp (+3-3) 
- (modified) llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp (+1-1) 
- (modified) llvm/utils/TableGen/OptionParserEmitter.cpp (+1-1) 


``````````diff
diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h
index 842cf704de479..21be5ec343059 100644
--- a/llvm/include/llvm/ADT/StringMapEntry.h
+++ b/llvm/include/llvm/ADT/StringMapEntry.h
@@ -110,7 +110,7 @@ class StringMapEntry final : public StringMapEntryStorage<ValueTy> {
   }
 
   /// getKeyData - Return the start of the string data that is the key for this
-  /// value.  The string data is always stored immediately after the
+  /// value. The string data is always stored immediately after the
   /// StringMapEntry object.
   const char *getKeyData() const {
     return reinterpret_cast<const char *>(this + 1);
diff --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h
index 21795644d4bd6..1550515ce3d7b 100644
--- a/llvm/include/llvm/TableGen/StringToOffsetTable.h
+++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h
@@ -23,9 +23,10 @@ namespace llvm {
 class StringToOffsetTable {
   StringMap<unsigned> StringOffset;
   std::string AggregateString;
+  const bool AppendZero;
 
 public:
-  StringToOffsetTable() {
+  StringToOffsetTable(bool AppendZero = true) : AppendZero(AppendZero) {
     // Ensure we always put the empty string at offset zero. That lets empty
     // initialization also be zero initialization for offsets into the table.
     GetOrAddStringOffset("");
@@ -34,7 +35,7 @@ class StringToOffsetTable {
   bool empty() const { return StringOffset.empty(); }
   size_t size() const { return AggregateString.size(); }
 
-  unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true);
+  unsigned GetOrAddStringOffset(StringRef Str);
 
   // Returns the offset of `Str` in the table if its preset, else return
   // std::nullopt.
@@ -45,7 +46,7 @@ class StringToOffsetTable {
     return II->second;
   }
 
-  // Emit a string table definition with the provided name and indent.
+  // Emit a string table definition with the provided name.
   //
   // When possible, this uses string-literal concatenation to emit the string
   // contents in a readable and searchable way. However, for (very) large string
@@ -56,8 +57,7 @@ class StringToOffsetTable {
   // The string table, and its input string contents, are always emitted as both
   // `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be
   // valid identifiers to declare.
-  void EmitStringTableDef(raw_ostream &OS, const Twine &Name,
-                          const Twine &Indent = "") const;
+  void EmitStringTableDef(raw_ostream &OS, const Twine &Name) const;
 
   // Emit the string as one single string.
   void EmitString(raw_ostream &O) const;
diff --git a/llvm/lib/TableGen/StringToOffsetTable.cpp b/llvm/lib/TableGen/StringToOffsetTable.cpp
index 0db3f35f1c4b3..17e1e660c15ee 100644
--- a/llvm/lib/TableGen/StringToOffsetTable.cpp
+++ b/llvm/lib/TableGen/StringToOffsetTable.cpp
@@ -9,32 +9,37 @@
 #include "llvm/TableGen/StringToOffsetTable.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Main.h"
 
 using namespace llvm;
 
-unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str,
-                                                   bool appendZero) {
+unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str) {
   auto [II, Inserted] = StringOffset.insert({Str, size()});
   if (Inserted) {
     // Add the string to the aggregate if this is the first time found.
     AggregateString.append(Str.begin(), Str.end());
-    if (appendZero)
+    if (AppendZero)
       AggregateString += '\0';
   }
 
   return II->second;
 }
 
-void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
-                                             const Twine &Indent) const {
+void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS,
+                                             const Twine &Name) const {
+  // This generates a `llvm::StringTable` which expects that entries are null
+  // terminated. So fail with an error if `AppendZero` is false.
+  if (!AppendZero)
+    PrintFatalError("llvm::StringTable requires null terminated strings");
+
   OS << formatv(R"(
 #ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Woverlength-strings"
 #endif
-{0}static constexpr char {1}Storage[] = )",
-                Indent, Name);
+static constexpr char {}Storage[] = )",
+                Name);
 
   // MSVC silently miscompiles string literals longer than 64k in some
   // circumstances. The build system sets EmitLongStrLiterals to false when it
@@ -54,7 +59,7 @@ void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
          "Expected empty string at the end due to terminators!");
   Strings.pop_back();
   for (StringRef Str : Strings) {
-    OS << LineSep << Indent << "  ";
+    OS << LineSep << "  ";
     // If we can, just emit this as a string literal to be concatenated.
     if (!UseChars) {
       OS << "\"";
@@ -71,17 +76,17 @@ void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
     }
     OS << CharSep << "'\\0'";
   }
-  OS << LineSep << Indent << (UseChars ? "};" : "  ;");
+  OS << LineSep << (UseChars ? "};" : "  ;");
 
   OS << formatv(R"(
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
 
-{0}static constexpr llvm::StringTable {1} =
-{0}    {1}Storage;
+static constexpr llvm::StringTable
+{0} = {0}Storage;
 )",
-                Indent, Name);
+                Name);
 }
 
 void StringToOffsetTable::EmitString(raw_ostream &O) const {
diff --git a/llvm/test/TableGen/MixedCasedMnemonic.td b/llvm/test/TableGen/MixedCasedMnemonic.td
index 14ab104c7e120..cb224ac59c6de 100644
--- a/llvm/test/TableGen/MixedCasedMnemonic.td
+++ b/llvm/test/TableGen/MixedCasedMnemonic.td
@@ -41,7 +41,7 @@ def :MnemonicAlias<"InstB", "BInst">;
 
 // Check that the matcher lower()s the mnemonics it matches.
 // MATCHER: static const char MnemonicTable[] =
-// MATCHER-NEXT: "\000\005ainst\005binst";
+// MATCHER-NEXT: "\005ainst\005binst";
 
 // Check that aInst appears before BInst in the match table.
 // This shows that the mnemonics are sorted in a case-insensitive way,
diff --git a/llvm/test/TableGen/SDNodeInfoEmitter/basic.td b/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
index b8bff520bbcaa..2b4c76a0d4543 100644
--- a/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
+++ b/llvm/test/TableGen/SDNodeInfoEmitter/basic.td
@@ -35,8 +35,8 @@ def MyTarget : Target;
 // CHECK-NEXT:  #pragma GCC diagnostic pop
 // CHECK-NEXT:  #endif
 // CHECK-EMPTY:
-// CHECK-NEXT:  static constexpr llvm::StringTable MyTargetSDNodeNames =
-// CHECK-NEXT:      MyTargetSDNodeNamesStorage;
+// CHECK-NEXT:  static constexpr llvm::StringTable
+// CHECK-NEXT:  MyTargetSDNodeNames = MyTargetSDNodeNamesStorage;
 // CHECK-EMPTY:
 // CHECK-NEXT:  static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
 // CHECK-NEXT:    /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 6dec4110decbe..bfd158614ae39 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -3440,7 +3440,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   if (!ReportMultipleNearMisses)
     emitAsmTiedOperandConstraints(Target, Info, OS, HasOptionalOperands);
 
-  StringToOffsetTable StringTable;
+  StringToOffsetTable StringTable(/*AppendZero=*/false);
 
   size_t MaxNumOperands = 0;
   unsigned MaxMnemonicIndex = 0;
@@ -3451,8 +3451,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
 
     // Store a pascal-style length byte in the mnemonic.
     std::string LenMnemonic = char(MI->Mnemonic.size()) + MI->Mnemonic.lower();
-    MaxMnemonicIndex = std::max(
-        MaxMnemonicIndex, StringTable.GetOrAddStringOffset(LenMnemonic, false));
+    MaxMnemonicIndex = std::max(MaxMnemonicIndex,
+                                StringTable.GetOrAddStringOffset(LenMnemonic));
   }
 
   OS << "static const char MnemonicTable[] =\n";
diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
index 2876a0e2ad546..ac5c455ed63ce 100644
--- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
@@ -252,7 +252,7 @@ void IntrinsicEmitter::EmitIntrinsicToNameTable(
 
 )";
 
-  Table.EmitStringTableDef(OS, "IntrinsicNameTable", /*Indent=*/"");
+  Table.EmitStringTableDef(OS, "IntrinsicNameTable");
 
   OS << R"(
 static constexpr unsigned IntrinsicNameOffsetTable[] = {
diff --git a/llvm/utils/TableGen/OptionParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp
index f4964282934eb..a470fbbcadd58 100644
--- a/llvm/utils/TableGen/OptionParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptionParserEmitter.cpp
@@ -291,7 +291,7 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "/////////\n";
   OS << "// String table\n\n";
   OS << "#ifdef OPTTABLE_STR_TABLE_CODE\n";
-  Table.EmitStringTableDef(OS, "OptionStrTable", /*Indent=*/"");
+  Table.EmitStringTableDef(OS, "OptionStrTable");
   OS << "#endif // OPTTABLE_STR_TABLE_CODE\n\n";
 
   // Dump prefixes.

``````````

</details>


https://github.com/llvm/llvm-project/pull/147712


More information about the llvm-commits mailing list