[llvm] [NFC][TableGen] Emit more readable builtin string table. (PR #105445)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 17:58:35 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-adt
Author: Rahul Joshi (jurahul)
<details>
<summary>Changes</summary>
- Adopt `SequenceToOffsetTable` to emit the string table in `EmitIntrinsicToBuiltinMap`.
- `SequenceToOffsetTable` emits a string table using string literal concatenation of individual null terminated fragments, one fragment on each line, making the table more readable as well searchable.
- Adopt `StringRef` to be used as the sequence type in `SequenceToOffsetTable` by providing `value_type` and reverse iterators.
- Add an option to `SequenceToOffsetTable::emitStringLiteralDef` to skip generating GCC pragmas to disable string length warning if we are generating code that will be compiled as C++.
- Reduces string table size for both Clang and MS builtins by several bytes: Clang: 134915 -> 134001, MS: 68->56 bytes.
---
Full diff: https://github.com/llvm/llvm-project/pull/105445.diff
3 Files Affected:
- (modified) llvm/include/llvm/ADT/StringRef.h (+11)
- (modified) llvm/utils/TableGen/Basic/SequenceToOffsetTable.h (+21-11)
- (modified) llvm/utils/TableGen/IntrinsicEmitter.cpp (+8-8)
``````````diff
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 049f22b03e46e8..32cf0a2218e5e9 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -17,6 +17,7 @@
#include <cassert>
#include <cstddef>
#include <cstring>
+#include <iterator>
#include <limits>
#include <string>
#include <string_view>
@@ -54,6 +55,8 @@ namespace llvm {
using iterator = const char *;
using const_iterator = const char *;
using size_type = size_t;
+ using value_type = char;
+ using reverse_iterator = std::reverse_iterator<iterator>;
private:
/// The start of the string, in an external buffer.
@@ -112,6 +115,14 @@ namespace llvm {
iterator end() const { return Data + Length; }
+ reverse_iterator rbegin() const {
+ return std::make_reverse_iterator(end());
+ }
+
+ reverse_iterator rend() const {
+ return std::make_reverse_iterator(begin());
+ }
+
const unsigned char *bytes_begin() const {
return reinterpret_cast<const unsigned char *>(begin());
}
diff --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
index 09100b39650d81..141f91c14c5c9e 100644
--- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
+++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
@@ -125,8 +125,14 @@ class SequenceToOffsetTable {
/// `emitStringLiteralDef` - Print out the table as the body of an array
/// initializer, where each element is a C string literal terminated by
/// `\0`. Falls back to emitting a comma-separated integer list if
- /// `EmitLongStrLiterals` is false
- void emitStringLiteralDef(raw_ostream &OS, const llvm::Twine &Decl) const {
+ /// `EmitLongStrLiterals` is false.
+ ///
+ /// Per https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html, the warning
+ /// `-Woverlength-strings`, which could be triggered by the generated code, is
+ /// not applicable for C++. So if \p IsCPP is true, we skip generating the GCC
+ /// pragmas to ignore this warning.
+ void emitStringLiteralDef(raw_ostream &OS, const llvm::Twine &Decl,
+ bool IsCPP = false) const {
assert(Entries && "Call layout() before emitStringLiteralDef()");
if (!EmitLongStrLiterals) {
OS << Decl << " = {\n";
@@ -135,20 +141,24 @@ class SequenceToOffsetTable {
return;
}
- OS << "\n#ifdef __GNUC__\n"
- << "#pragma GCC diagnostic push\n"
- << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"
- << "#endif\n"
- << Decl << " = {\n";
+ if (!IsCPP) {
+ OS << "\n#ifdef __GNUC__\n"
+ << "#pragma GCC diagnostic push\n"
+ << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"
+ << "#endif\n";
+ }
+ OS << Decl << " = {\n";
for (auto I : Seqs) {
OS << " /* " << I.second << " */ \"";
OS.write_escaped(I.first);
OS << "\\0\"\n";
}
- OS << "};\n"
- << "#ifdef __GNUC__\n"
- << "#pragma GCC diagnostic pop\n"
- << "#endif\n\n";
+ OS << "};\n";
+ if (!IsCPP) {
+ OS << "#ifdef __GNUC__\n"
+ << "#pragma GCC diagnostic pop\n"
+ << "#endif\n\n";
+ }
}
/// emit - Print out the table as the body of an array initializer.
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 5d972157828784..cf0b1bbca8ab3d 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -24,7 +24,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/StringToOffsetTable.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <algorithm>
#include <array>
@@ -637,15 +636,17 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
// Populate the string table with the names of all the builtins after
// removing this common prefix.
- StringToOffsetTable Table;
+ SequenceToOffsetTable<StringRef> Table;
for (const auto &[TargetPrefix, Entry] : BuiltinMap) {
auto &[Map, CommonPrefix] = Entry;
for (auto &[BuiltinName, EnumName] : Map) {
StringRef Suffix = BuiltinName.substr(CommonPrefix->size());
- Table.GetOrAddStringOffset(Suffix);
+ Table.add(Suffix);
}
}
+ Table.layout();
+
OS << formatv(R"(
// Get the LLVM intrinsic that corresponds to a builtin. This is used by the
// C front-end. The builtin name is passed in as BuiltinName, and a target
@@ -669,9 +670,8 @@ Intrinsic::getIntrinsicFor{1}Builtin(StringRef TargetPrefix,
}
if (!Table.empty()) {
- OS << " static constexpr char BuiltinNames[] = {\n";
- Table.EmitCharArray(OS);
- OS << " };\n\n";
+ Table.emitStringLiteralDef(OS, " static constexpr char BuiltinNames[]",
+ /*IsCPP=*/true);
OS << R"(
struct BuiltinEntry {
@@ -704,8 +704,8 @@ Intrinsic::getIntrinsicFor{1}Builtin(StringRef TargetPrefix,
TargetPrefix);
for (const auto &[BuiltinName, EnumName] : Map) {
StringRef Suffix = BuiltinName.substr(CommonPrefix->size());
- OS << formatv(" {{{0}, {1}}, // {2}\n", EnumName,
- *Table.GetStringOffset(Suffix), BuiltinName);
+ OS << formatv(" {{{0}, {1}}, // {2}\n", EnumName, Table.get(Suffix),
+ BuiltinName);
}
OS << formatv(" }; // {0}Names\n\n", TargetPrefix);
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/105445
More information about the llvm-commits
mailing list