[llvm] [NFC][TableGen] Emit more readable builtin string table. (PR #105445)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 20 16:08:53 PDT 2024


https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/105445

>From d66a26d1ca26574794b063dbdb67b6b2fee0eba5 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Tue, 20 Aug 2024 15:09:52 -0700
Subject: [PATCH] [NFC][TableGen] Emit more readable builtin string table.

- Adopt `SequenceToOffsetTable` to emit the string table in
  `EmitIntrinsicToBuiltinMap`.
- `SequenceToOffsetTable` emits a string table using string literal
  concatenation of individual null terminated fragments, one fragment on each
  line, making the table more readable as well searchable.
- Adopt `StringRef` to be used as the sequence type in `SequenceToOffsetTable`
  by providing `value_type` and reverse iterators.
- Reduces string table size for both Clang and MS builtins by several
  bytes: Clang: 134915 -> 134001, MS: 68->56 bytes.
---
 llvm/include/llvm/ADT/StringRef.h             | 11 +++++++
 .../TableGen/Basic/SequenceToOffsetTable.h    | 32 ++++++++++++-------
 llvm/utils/TableGen/IntrinsicEmitter.cpp      | 16 +++++-----
 3 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 049f22b03e46e8..32cf0a2218e5e9 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -17,6 +17,7 @@
 #include <cassert>
 #include <cstddef>
 #include <cstring>
+#include <iterator>
 #include <limits>
 #include <string>
 #include <string_view>
@@ -54,6 +55,8 @@ namespace llvm {
     using iterator = const char *;
     using const_iterator = const char *;
     using size_type = size_t;
+    using value_type = char;
+    using reverse_iterator = std::reverse_iterator<iterator>;
 
   private:
     /// The start of the string, in an external buffer.
@@ -112,6 +115,14 @@ namespace llvm {
 
     iterator end() const { return Data + Length; }
 
+    reverse_iterator rbegin() const {
+      return std::make_reverse_iterator(end());
+    }
+
+    reverse_iterator rend() const {
+      return std::make_reverse_iterator(begin());
+    }
+
     const unsigned char *bytes_begin() const {
       return reinterpret_cast<const unsigned char *>(begin());
     }
diff --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
index 09100b39650d81..141f91c14c5c9e 100644
--- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
+++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
@@ -125,8 +125,14 @@ class SequenceToOffsetTable {
   /// `emitStringLiteralDef` - Print out the table as the body of an array
   /// initializer, where each element is a C string literal terminated by
   /// `\0`. Falls back to emitting a comma-separated integer list if
-  /// `EmitLongStrLiterals` is false
-  void emitStringLiteralDef(raw_ostream &OS, const llvm::Twine &Decl) const {
+  /// `EmitLongStrLiterals` is false.
+  ///
+  /// Per https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html, the warning
+  /// `-Woverlength-strings`, which could be triggered by the generated code, is
+  /// not applicable for C++. So if \p IsCPP is true, we skip generating the GCC
+  /// pragmas to ignore this warning.
+  void emitStringLiteralDef(raw_ostream &OS, const llvm::Twine &Decl,
+                            bool IsCPP = false) const {
     assert(Entries && "Call layout() before emitStringLiteralDef()");
     if (!EmitLongStrLiterals) {
       OS << Decl << " = {\n";
@@ -135,20 +141,24 @@ class SequenceToOffsetTable {
       return;
     }
 
-    OS << "\n#ifdef __GNUC__\n"
-       << "#pragma GCC diagnostic push\n"
-       << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"
-       << "#endif\n"
-       << Decl << " = {\n";
+    if (!IsCPP) {
+      OS << "\n#ifdef __GNUC__\n"
+         << "#pragma GCC diagnostic push\n"
+         << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"
+         << "#endif\n";
+    }
+    OS << Decl << " = {\n";
     for (auto I : Seqs) {
       OS << "  /* " << I.second << " */ \"";
       OS.write_escaped(I.first);
       OS << "\\0\"\n";
     }
-    OS << "};\n"
-       << "#ifdef __GNUC__\n"
-       << "#pragma GCC diagnostic pop\n"
-       << "#endif\n\n";
+    OS << "};\n";
+    if (!IsCPP) {
+      OS << "#ifdef __GNUC__\n"
+         << "#pragma GCC diagnostic pop\n"
+         << "#endif\n\n";
+    }
   }
 
   /// emit - Print out the table as the body of an array initializer.
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 5d972157828784..cf0b1bbca8ab3d 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/StringToOffsetTable.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <algorithm>
 #include <array>
@@ -637,15 +636,17 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
 
   // Populate the string table with the names of all the builtins after
   // removing this common prefix.
-  StringToOffsetTable Table;
+  SequenceToOffsetTable<StringRef> Table;
   for (const auto &[TargetPrefix, Entry] : BuiltinMap) {
     auto &[Map, CommonPrefix] = Entry;
     for (auto &[BuiltinName, EnumName] : Map) {
       StringRef Suffix = BuiltinName.substr(CommonPrefix->size());
-      Table.GetOrAddStringOffset(Suffix);
+      Table.add(Suffix);
     }
   }
 
+  Table.layout();
+
   OS << formatv(R"(
 // Get the LLVM intrinsic that corresponds to a builtin. This is used by the
 // C front-end. The builtin name is passed in as BuiltinName, and a target
@@ -669,9 +670,8 @@ Intrinsic::getIntrinsicFor{1}Builtin(StringRef TargetPrefix,
   }
 
   if (!Table.empty()) {
-    OS << "  static constexpr char BuiltinNames[] = {\n";
-    Table.EmitCharArray(OS);
-    OS << "  };\n\n";
+    Table.emitStringLiteralDef(OS, "  static constexpr char BuiltinNames[]",
+                               /*IsCPP=*/true);
 
     OS << R"(
   struct BuiltinEntry {
@@ -704,8 +704,8 @@ Intrinsic::getIntrinsicFor{1}Builtin(StringRef TargetPrefix,
                   TargetPrefix);
     for (const auto &[BuiltinName, EnumName] : Map) {
       StringRef Suffix = BuiltinName.substr(CommonPrefix->size());
-      OS << formatv("    {{{0}, {1}}, // {2}\n", EnumName,
-                    *Table.GetStringOffset(Suffix), BuiltinName);
+      OS << formatv("    {{{0}, {1}}, // {2}\n", EnumName, Table.get(Suffix),
+                    BuiltinName);
     }
     OS << formatv("  }; // {0}Names\n\n", TargetPrefix);
   }



More information about the llvm-commits mailing list