[llvm] [StrTable] Use string literal emission for intrinsics on non-MSVC platforms (PR #124856)
Reid Kleckner via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 28 15:21:53 PST 2025
https://github.com/rnk updated https://github.com/llvm/llvm-project/pull/124856
>From c63fe050475e1eb2982bc590ecb74180a0d99086 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk at google.com>
Date: Tue, 28 Jan 2025 22:29:31 +0000
Subject: [PATCH 1/2] [StrTable] Use string literal emission for non-MSVC
platforms
This mainly transitions the LLVM intrinsic string table from character
emission to string literal emission.
Add missing FormatVariadic.h includes to account for moving the include
to a cpp file.
---
.../llvm/TableGen/StringToOffsetTable.h | 104 +-------------
llvm/lib/TableGen/CMakeLists.txt | 1 +
llvm/lib/TableGen/StringToOffsetTable.cpp | 129 ++++++++++++++++++
llvm/utils/TableGen/AsmMatcherEmitter.cpp | 1 +
llvm/utils/TableGen/Basic/TableGen.cpp | 9 --
llvm/utils/TableGen/SDNodeInfoEmitter.cpp | 1 +
.../gn/secondary/llvm/lib/TableGen/BUILD.gn | 1 +
7 files changed, 136 insertions(+), 110 deletions(-)
create mode 100644 llvm/lib/TableGen/StringToOffsetTable.cpp
diff --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h
index e716411514bd63..21795644d4bd67 100644
--- a/llvm/include/llvm/TableGen/StringToOffsetTable.h
+++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h
@@ -12,8 +12,6 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
#include <optional>
namespace llvm {
@@ -36,17 +34,7 @@ class StringToOffsetTable {
bool empty() const { return StringOffset.empty(); }
size_t size() const { return AggregateString.size(); }
- unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
- auto [II, Inserted] = StringOffset.insert({Str, size()});
- if (Inserted) {
- // Add the string to the aggregate if this is the first time found.
- AggregateString.append(Str.begin(), Str.end());
- if (appendZero)
- AggregateString += '\0';
- }
-
- return II->second;
- }
+ unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true);
// Returns the offset of `Str` in the table if its preset, else return
// std::nullopt.
@@ -69,96 +57,10 @@ class StringToOffsetTable {
// `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be
// valid identifiers to declare.
void EmitStringTableDef(raw_ostream &OS, const Twine &Name,
- const Twine &Indent = "") const {
- OS << formatv(R"(
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Woverlength-strings"
-#endif
-{0}static constexpr char {1}Storage[] = )",
- Indent, Name);
-
- // MSVC silently miscompiles string literals longer than 64k in some
- // circumstances. When the string table is longer, emit it as an array of
- // character literals.
- bool UseChars = AggregateString.size() > (64 * 1024);
- OS << (UseChars ? "{\n" : "\n");
-
- llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n");
- llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0'));
- // We should always have an empty string at the start, and because these are
- // null terminators rather than separators, we'll have one at the end as
- // well. Skip the end one.
- assert(Strings.front().empty() && "Expected empty initial string!");
- assert(Strings.back().empty() &&
- "Expected empty string at the end due to terminators!");
- Strings.pop_back();
- for (StringRef Str : Strings) {
- OS << LineSep << Indent << " ";
- // If we can, just emit this as a string literal to be concatenated.
- if (!UseChars) {
- OS << "\"";
- OS.write_escaped(Str);
- OS << "\\0\"";
- continue;
- }
-
- llvm::ListSeparator CharSep(", ");
- for (char C : Str) {
- OS << CharSep << "'";
- OS.write_escaped(StringRef(&C, 1));
- OS << "'";
- }
- OS << CharSep << "'\\0'";
- }
- OS << LineSep << Indent << (UseChars ? "};" : " ;");
-
- OS << formatv(R"(
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-{0}static constexpr llvm::StringTable {1} =
-{0} {1}Storage;
-)",
- Indent, Name);
- }
+ const Twine &Indent = "") const;
// Emit the string as one single string.
- void EmitString(raw_ostream &O) const {
- // Escape the string.
- SmallString<256> EscapedStr;
- raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
-
- O << " \"";
- unsigned CharsPrinted = 0;
- for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
- if (CharsPrinted > 70) {
- O << "\"\n \"";
- CharsPrinted = 0;
- }
- O << EscapedStr[i];
- ++CharsPrinted;
-
- // Print escape sequences all together.
- if (EscapedStr[i] != '\\')
- continue;
-
- assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
- if (isDigit(EscapedStr[i + 1])) {
- assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
- "Expected 3 digit octal escape!");
- O << EscapedStr[++i];
- O << EscapedStr[++i];
- O << EscapedStr[++i];
- CharsPrinted += 3;
- } else {
- O << EscapedStr[++i];
- ++CharsPrinted;
- }
- }
- O << "\"";
- }
+ void EmitString(raw_ostream &O) const;
};
} // end namespace llvm
diff --git a/llvm/lib/TableGen/CMakeLists.txt b/llvm/lib/TableGen/CMakeLists.txt
index 84815c77369979..0f9284c8bb999d 100644
--- a/llvm/lib/TableGen/CMakeLists.txt
+++ b/llvm/lib/TableGen/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMTableGen
Record.cpp
SetTheory.cpp
StringMatcher.cpp
+ StringToOffsetTable.cpp
TableGenBackend.cpp
TableGenBackendSkeleton.cpp
TGLexer.cpp
diff --git a/llvm/lib/TableGen/StringToOffsetTable.cpp b/llvm/lib/TableGen/StringToOffsetTable.cpp
new file mode 100644
index 00000000000000..6de889e1a10ee3
--- /dev/null
+++ b/llvm/lib/TableGen/StringToOffsetTable.cpp
@@ -0,0 +1,129 @@
+//===- StringToOffsetTable.h - Emit a big concatenated string ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/StringToOffsetTable.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace llvm {
+cl::opt<bool> EmitLongStrLiterals(
+ "long-string-literals",
+ cl::desc("when emitting large string tables, prefer string literals over "
+ "comma-separated char literals. This can be a readability and "
+ "compile-time performance win, but upsets some compilers"),
+ cl::Hidden, cl::init(true));
+} // end namespace llvm
+
+
+unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str,
+ bool appendZero) {
+ auto [II, Inserted] = StringOffset.insert({Str, size()});
+ if (Inserted) {
+ // Add the string to the aggregate if this is the first time found.
+ AggregateString.append(Str.begin(), Str.end());
+ if (appendZero)
+ AggregateString += '\0';
+ }
+
+ return II->second;
+}
+
+void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
+ const Twine &Indent) const {
+ OS << formatv(R"(
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Woverlength-strings"
+#endif
+{0}static constexpr char {1}Storage[] = )",
+ Indent, Name);
+
+ // MSVC silently miscompiles string literals longer than 64k in some
+ // circumstances. In this case, the build system sets EmitLongStrLiterals to
+ // false. When that option is false and the string table is longer than 64k,
+ // emit it as an array of character literals.
+ bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024);
+ OS << (UseChars ? "{\n" : "\n");
+
+ llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n");
+ llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0'));
+ // We should always have an empty string at the start, and because these are
+ // null terminators rather than separators, we'll have one at the end as
+ // well. Skip the end one.
+ assert(Strings.front().empty() && "Expected empty initial string!");
+ assert(Strings.back().empty() &&
+ "Expected empty string at the end due to terminators!");
+ Strings.pop_back();
+ for (StringRef Str : Strings) {
+ OS << LineSep << Indent << " ";
+ // If we can, just emit this as a string literal to be concatenated.
+ if (!UseChars) {
+ OS << "\"";
+ OS.write_escaped(Str);
+ OS << "\\0\"";
+ continue;
+ }
+
+ llvm::ListSeparator CharSep(", ");
+ for (char C : Str) {
+ OS << CharSep << "'";
+ OS.write_escaped(StringRef(&C, 1));
+ OS << "'";
+ }
+ OS << CharSep << "'\\0'";
+ }
+ OS << LineSep << Indent << (UseChars ? "};" : " ;");
+
+ OS << formatv(R"(
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+{0}static constexpr llvm::StringTable {1} =
+{0} {1}Storage;
+)",
+ Indent, Name);
+}
+
+void StringToOffsetTable::EmitString(raw_ostream &O) const {
+ // Escape the string.
+ SmallString<256> EscapedStr;
+ raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
+
+ O << " \"";
+ unsigned CharsPrinted = 0;
+ for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
+ if (CharsPrinted > 70) {
+ O << "\"\n \"";
+ CharsPrinted = 0;
+ }
+ O << EscapedStr[i];
+ ++CharsPrinted;
+
+ // Print escape sequences all together.
+ if (EscapedStr[i] != '\\')
+ continue;
+
+ assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
+ if (isDigit(EscapedStr[i + 1])) {
+ assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
+ "Expected 3 digit octal escape!");
+ O << EscapedStr[++i];
+ O << EscapedStr[++i];
+ O << EscapedStr[++i];
+ CharsPrinted += 3;
+ } else {
+ O << EscapedStr[++i];
+ ++CharsPrinted;
+ }
+ }
+ O << "\"";
+}
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 7684387d80fe24..0f2e20eb37649b 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -110,6 +110,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/StringMatcher.h"
diff --git a/llvm/utils/TableGen/Basic/TableGen.cpp b/llvm/utils/TableGen/Basic/TableGen.cpp
index 80ac93f2b54fb6..edb7791500699f 100644
--- a/llvm/utils/TableGen/Basic/TableGen.cpp
+++ b/llvm/utils/TableGen/Basic/TableGen.cpp
@@ -26,15 +26,6 @@
using namespace llvm;
-namespace llvm {
-cl::opt<bool> EmitLongStrLiterals(
- "long-string-literals",
- cl::desc("when emitting large string tables, prefer string literals over "
- "comma-separated char literals. This can be a readability and "
- "compile-time performance win, but upsets some compilers"),
- cl::Hidden, cl::init(true));
-} // end namespace llvm
-
static cl::OptionCategory PrintEnumsCat("Options for -print-enums");
static cl::opt<std::string> Class("class",
cl::desc("Print Enum list for this class"),
diff --git a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp
index 63ee0deb871109..64f03dae83e7de 100644
--- a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp
+++ b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp
@@ -9,6 +9,7 @@
#include "Basic/SequenceToOffsetTable.h"
#include "Common/CodeGenDAGPatterns.h" // For SDNodeInfo.
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/StringToOffsetTable.h"
#include "llvm/TableGen/TableGenBackend.h"
diff --git a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn
index d90df7bc0e57a5..b40fdf154b01a2 100644
--- a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn
@@ -10,6 +10,7 @@ static_library("TableGen") {
"Record.cpp",
"SetTheory.cpp",
"StringMatcher.cpp",
+ "StringToOffsetTable.cpp",
"TGLexer.cpp",
"TGParser.cpp",
"TGTimer.cpp",
>From 1761ecd8e0f2c8260ad7acb0483549e773705acc Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk at google.com>
Date: Tue, 28 Jan 2025 23:19:44 +0000
Subject: [PATCH 2/2] fix copy paste and update comment
---
llvm/lib/TableGen/StringToOffsetTable.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/TableGen/StringToOffsetTable.cpp b/llvm/lib/TableGen/StringToOffsetTable.cpp
index 6de889e1a10ee3..eb6455e0f0251c 100644
--- a/llvm/lib/TableGen/StringToOffsetTable.cpp
+++ b/llvm/lib/TableGen/StringToOffsetTable.cpp
@@ -1,4 +1,4 @@
-//===- StringToOffsetTable.h - Emit a big concatenated string ---*- C++ -*-===//
+//===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -22,7 +22,6 @@ cl::opt<bool> EmitLongStrLiterals(
cl::Hidden, cl::init(true));
} // end namespace llvm
-
unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str,
bool appendZero) {
auto [II, Inserted] = StringOffset.insert({Str, size()});
@@ -47,9 +46,10 @@ void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
Indent, Name);
// MSVC silently miscompiles string literals longer than 64k in some
- // circumstances. In this case, the build system sets EmitLongStrLiterals to
- // false. When that option is false and the string table is longer than 64k,
- // emit it as an array of character literals.
+ // circumstances. The build system sets EmitLongStrLiterals to false when it
+ // detects that it is targetting MSVC. When that option is false and the
+ // string table is longer than 64k, emit it as an array of character
+ // literals.
bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024);
OS << (UseChars ? "{\n" : "\n");
More information about the llvm-commits
mailing list