[llvm] 0a27c4e - [StrTable] Use string literal emission for intrinsics on non-MSVC platforms (#124856)

via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 13 08:58:56 PDT 2025


Author: Reid Kleckner
Date: 2025-04-13T17:58:53+02:00
New Revision: 0a27c4e318e778b520306a9e2102e03023cfaa33

URL: https://github.com/llvm/llvm-project/commit/0a27c4e318e778b520306a9e2102e03023cfaa33
DIFF: https://github.com/llvm/llvm-project/commit/0a27c4e318e778b520306a9e2102e03023cfaa33.diff

LOG: [StrTable] Use string literal emission for intrinsics on non-MSVC platforms (#124856)

This mainly transitions the LLVM intrinsic string table from character
emission to string literal emission, which I confirmed happens for me
locally.

I moved the guts of StringToOffsetTable to a cpp file so I could move
the `EmitLongStrLiterals` cl::opt global to a non-vague linkage home in
the `TableGen` library. I had to add missing FormatVariadic.h includes
to account for moving other includes to a cpp file.

Added: 
    llvm/lib/TableGen/StringToOffsetTable.cpp

Modified: 
    llvm/cmake/modules/TableGen.cmake
    llvm/include/llvm/TableGen/Main.h
    llvm/include/llvm/TableGen/StringToOffsetTable.h
    llvm/lib/TableGen/CMakeLists.txt
    llvm/lib/TableGen/Main.cpp
    llvm/utils/TableGen/AsmMatcherEmitter.cpp
    llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
    llvm/utils/TableGen/Basic/TableGen.cpp
    llvm/utils/TableGen/SDNodeInfoEmitter.cpp
    llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn

Removed: 
    


################################################################################
diff  --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
index ffcc718b47775..bf914c379e809 100644
--- a/llvm/cmake/modules/TableGen.cmake
+++ b/llvm/cmake/modules/TableGen.cmake
@@ -68,7 +68,9 @@ function(tablegen project ofn)
   # char literals, instead. If we're cross-compiling, then conservatively assume
   # that the source might be consumed by MSVC.
   # [1] https://docs.microsoft.com/en-us/cpp/cpp/compiler-limits?view=vs-2017
-  if (MSVC AND project STREQUAL LLVM)
+  # Don't pass this flag to mlir-src-sharder, since it doesn't support the
+  # flag, and it doesn't need it.
+  if (MSVC AND NOT "${project}" STREQUAL "MLIR_SRC_SHARDER")
     list(APPEND LLVM_TABLEGEN_FLAGS "--long-string-literals=0")
   endif()
   if (CMAKE_GENERATOR MATCHES "Visual Studio")

diff  --git a/llvm/include/llvm/TableGen/Main.h b/llvm/include/llvm/TableGen/Main.h
index e8c60e2869902..5f68be188de78 100644
--- a/llvm/include/llvm/TableGen/Main.h
+++ b/llvm/include/llvm/TableGen/Main.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_TABLEGEN_MAIN_H
 #define LLVM_TABLEGEN_MAIN_H
 
+#include "llvm/Support/CommandLine.h"
 #include <functional>
 
 namespace llvm {
@@ -27,6 +28,10 @@ using TableGenMainFn = bool(raw_ostream &OS, const RecordKeeper &Records);
 int TableGenMain(const char *argv0,
                  std::function<TableGenMainFn> MainFn = nullptr);
 
+/// Controls emitting large character arrays as strings or character arrays.
+/// Typically set to false when building with MSVC.
+extern cl::opt<bool> EmitLongStrLiterals;
+
 } // end namespace llvm
 
 #endif // LLVM_TABLEGEN_MAIN_H

diff  --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h
index e716411514bd6..21795644d4bd6 100644
--- a/llvm/include/llvm/TableGen/StringToOffsetTable.h
+++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h
@@ -12,8 +12,6 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
 #include <optional>
 
 namespace llvm {
@@ -36,17 +34,7 @@ class StringToOffsetTable {
   bool empty() const { return StringOffset.empty(); }
   size_t size() const { return AggregateString.size(); }
 
-  unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
-    auto [II, Inserted] = StringOffset.insert({Str, size()});
-    if (Inserted) {
-      // Add the string to the aggregate if this is the first time found.
-      AggregateString.append(Str.begin(), Str.end());
-      if (appendZero)
-        AggregateString += '\0';
-    }
-
-    return II->second;
-  }
+  unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true);
 
   // Returns the offset of `Str` in the table if its preset, else return
   // std::nullopt.
@@ -69,96 +57,10 @@ class StringToOffsetTable {
   // `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be
   // valid identifiers to declare.
   void EmitStringTableDef(raw_ostream &OS, const Twine &Name,
-                          const Twine &Indent = "") const {
-    OS << formatv(R"(
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Woverlength-strings"
-#endif
-{0}static constexpr char {1}Storage[] = )",
-                  Indent, Name);
-
-    // MSVC silently miscompiles string literals longer than 64k in some
-    // circumstances. When the string table is longer, emit it as an array of
-    // character literals.
-    bool UseChars = AggregateString.size() > (64 * 1024);
-    OS << (UseChars ? "{\n" : "\n");
-
-    llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n");
-    llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0'));
-    // We should always have an empty string at the start, and because these are
-    // null terminators rather than separators, we'll have one at the end as
-    // well. Skip the end one.
-    assert(Strings.front().empty() && "Expected empty initial string!");
-    assert(Strings.back().empty() &&
-           "Expected empty string at the end due to terminators!");
-    Strings.pop_back();
-    for (StringRef Str : Strings) {
-      OS << LineSep << Indent << "  ";
-      // If we can, just emit this as a string literal to be concatenated.
-      if (!UseChars) {
-        OS << "\"";
-        OS.write_escaped(Str);
-        OS << "\\0\"";
-        continue;
-      }
-
-      llvm::ListSeparator CharSep(", ");
-      for (char C : Str) {
-        OS << CharSep << "'";
-        OS.write_escaped(StringRef(&C, 1));
-        OS << "'";
-      }
-      OS << CharSep << "'\\0'";
-    }
-    OS << LineSep << Indent << (UseChars ? "};" : "  ;");
-
-    OS << formatv(R"(
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-{0}static constexpr llvm::StringTable {1} =
-{0}    {1}Storage;
-)",
-                  Indent, Name);
-  }
+                          const Twine &Indent = "") const;
 
   // Emit the string as one single string.
-  void EmitString(raw_ostream &O) const {
-    // Escape the string.
-    SmallString<256> EscapedStr;
-    raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
-
-    O << "    \"";
-    unsigned CharsPrinted = 0;
-    for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
-      if (CharsPrinted > 70) {
-        O << "\"\n    \"";
-        CharsPrinted = 0;
-      }
-      O << EscapedStr[i];
-      ++CharsPrinted;
-
-      // Print escape sequences all together.
-      if (EscapedStr[i] != '\\')
-        continue;
-
-      assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
-      if (isDigit(EscapedStr[i + 1])) {
-        assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
-               "Expected 3 digit octal escape!");
-        O << EscapedStr[++i];
-        O << EscapedStr[++i];
-        O << EscapedStr[++i];
-        CharsPrinted += 3;
-      } else {
-        O << EscapedStr[++i];
-        ++CharsPrinted;
-      }
-    }
-    O << "\"";
-  }
+  void EmitString(raw_ostream &O) const;
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/TableGen/CMakeLists.txt b/llvm/lib/TableGen/CMakeLists.txt
index 84815c7736997..0f9284c8bb999 100644
--- a/llvm/lib/TableGen/CMakeLists.txt
+++ b/llvm/lib/TableGen/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMTableGen
   Record.cpp
   SetTheory.cpp
   StringMatcher.cpp
+  StringToOffsetTable.cpp
   TableGenBackend.cpp
   TableGenBackendSkeleton.cpp
   TGLexer.cpp

diff  --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 35600bf2f1f86..ea716215e0679 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -64,6 +64,15 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
 static cl::opt<bool>
 TimePhases("time-phases", cl::desc("Time phases of parser and backend"));
 
+namespace llvm {
+cl::opt<bool> EmitLongStrLiterals(
+    "long-string-literals",
+    cl::desc("when emitting large string tables, prefer string literals over "
+             "comma-separated char literals. This can be a readability and "
+             "compile-time performance win, but upsets some compilers"),
+    cl::Hidden, cl::init(true));
+} // end namespace llvm
+
 static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
     "no-warn-on-unused-template-args",
     cl::desc("Disable unused template argument warnings."));

diff  --git a/llvm/lib/TableGen/StringToOffsetTable.cpp b/llvm/lib/TableGen/StringToOffsetTable.cpp
new file mode 100644
index 0000000000000..d73b5749ad7d5
--- /dev/null
+++ b/llvm/lib/TableGen/StringToOffsetTable.cpp
@@ -0,0 +1,120 @@
+//===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/StringToOffsetTable.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Main.h"
+
+using namespace llvm;
+
+unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str,
+                                                   bool appendZero) {
+  auto [II, Inserted] = StringOffset.insert({Str, size()});
+  if (Inserted) {
+    // Add the string to the aggregate if this is the first time found.
+    AggregateString.append(Str.begin(), Str.end());
+    if (appendZero)
+      AggregateString += '\0';
+  }
+
+  return II->second;
+}
+
+void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
+                                             const Twine &Indent) const {
+  OS << formatv(R"(
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Woverlength-strings"
+#endif
+{0}static constexpr char {1}Storage[] = )",
+                Indent, Name);
+
+  // MSVC silently miscompiles string literals longer than 64k in some
+  // circumstances. The build system sets EmitLongStrLiterals to false when it
+  // detects that it is targetting MSVC. When that option is false and the
+  // string table is longer than 64k, emit it as an array of character
+  // literals.
+  bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024);
+  OS << (UseChars ? "{\n" : "\n");
+
+  llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n");
+  llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0'));
+  // We should always have an empty string at the start, and because these are
+  // null terminators rather than separators, we'll have one at the end as
+  // well. Skip the end one.
+  assert(Strings.front().empty() && "Expected empty initial string!");
+  assert(Strings.back().empty() &&
+         "Expected empty string at the end due to terminators!");
+  Strings.pop_back();
+  for (StringRef Str : Strings) {
+    OS << LineSep << Indent << "  ";
+    // If we can, just emit this as a string literal to be concatenated.
+    if (!UseChars) {
+      OS << "\"";
+      OS.write_escaped(Str);
+      OS << "\\0\"";
+      continue;
+    }
+
+    llvm::ListSeparator CharSep(", ");
+    for (char C : Str) {
+      OS << CharSep << "'";
+      OS.write_escaped(StringRef(&C, 1));
+      OS << "'";
+    }
+    OS << CharSep << "'\\0'";
+  }
+  OS << LineSep << Indent << (UseChars ? "};" : "  ;");
+
+  OS << formatv(R"(
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+{0}static constexpr llvm::StringTable {1} =
+{0}    {1}Storage;
+)",
+                Indent, Name);
+}
+
+void StringToOffsetTable::EmitString(raw_ostream &O) const {
+  // Escape the string.
+  SmallString<256> EscapedStr;
+  raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
+
+  O << "    \"";
+  unsigned CharsPrinted = 0;
+  for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
+    if (CharsPrinted > 70) {
+      O << "\"\n    \"";
+      CharsPrinted = 0;
+    }
+    O << EscapedStr[i];
+    ++CharsPrinted;
+
+    // Print escape sequences all together.
+    if (EscapedStr[i] != '\\')
+      continue;
+
+    assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
+    if (isDigit(EscapedStr[i + 1])) {
+      assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
+             "Expected 3 digit octal escape!");
+      O << EscapedStr[++i];
+      O << EscapedStr[++i];
+      O << EscapedStr[++i];
+      CharsPrinted += 3;
+    } else {
+      O << EscapedStr[++i];
+      ++CharsPrinted;
+    }
+  }
+  O << "\"";
+}

diff  --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 24822c847046d..c954163cdeb3a 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -110,6 +110,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/StringMatcher.h"

diff  --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
index 35a9abdc37c82..8da6fbef0672e 100644
--- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
+++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
@@ -15,15 +15,14 @@
 #define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
 
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Main.h"
 #include <algorithm>
 #include <cassert>
 #include <functional>
 #include <map>
 
 namespace llvm {
-extern cl::opt<bool> EmitLongStrLiterals;
 
 inline void printChar(raw_ostream &OS, char C) {
   unsigned char UC(C);

diff  --git a/llvm/utils/TableGen/Basic/TableGen.cpp b/llvm/utils/TableGen/Basic/TableGen.cpp
index 80ac93f2b54fb..edb7791500699 100644
--- a/llvm/utils/TableGen/Basic/TableGen.cpp
+++ b/llvm/utils/TableGen/Basic/TableGen.cpp
@@ -26,15 +26,6 @@
 
 using namespace llvm;
 
-namespace llvm {
-cl::opt<bool> EmitLongStrLiterals(
-    "long-string-literals",
-    cl::desc("when emitting large string tables, prefer string literals over "
-             "comma-separated char literals. This can be a readability and "
-             "compile-time performance win, but upsets some compilers"),
-    cl::Hidden, cl::init(true));
-} // end namespace llvm
-
 static cl::OptionCategory PrintEnumsCat("Options for -print-enums");
 static cl::opt<std::string> Class("class",
                                   cl::desc("Print Enum list for this class"),

diff  --git a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp
index 63ee0deb87110..64f03dae83e7d 100644
--- a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp
+++ b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp
@@ -9,6 +9,7 @@
 #include "Basic/SequenceToOffsetTable.h"
 #include "Common/CodeGenDAGPatterns.h" // For SDNodeInfo.
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/StringToOffsetTable.h"
 #include "llvm/TableGen/TableGenBackend.h"

diff  --git a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn
index d90df7bc0e57a..b40fdf154b01a 100644
--- a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn
@@ -10,6 +10,7 @@ static_library("TableGen") {
     "Record.cpp",
     "SetTheory.cpp",
     "StringMatcher.cpp",
+    "StringToOffsetTable.cpp",
     "TGLexer.cpp",
     "TGParser.cpp",
     "TGTimer.cpp",


        


More information about the llvm-commits mailing list