[llvm] ce23515 - [tablegen] Emit string literals instead of char arrays
Luke Drummond via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 05:59:59 PST 2020
Author: Luke Drummond
Date: 2020-01-23T13:57:20Z
New Revision: ce23515f5ab01161c98449d833b3ae013b553aa8
URL: https://github.com/llvm/llvm-project/commit/ce23515f5ab01161c98449d833b3ae013b553aa8
DIFF: https://github.com/llvm/llvm-project/commit/ce23515f5ab01161c98449d833b3ae013b553aa8.diff
LOG: [tablegen] Emit string literals instead of char arrays
This changes the generated (Instr|Asm|Reg|Regclass)Name tables from this
form:
extern const char HexagonInstrNameData[] = {
/* 0 */ 'G', '_', 'F', 'L', 'O', 'G', '1', '0', 0,
/* 9 */ 'E', 'N', 'D', 'L', 'O', 'O', 'P', '0', 0,
/* 18 */ 'V', '6', '_', 'v', 'd', 'd', '0', 0,
/* 26 */ 'P', 'S', '_', 'v', 'd', 'd', '0', 0,
[...]
};
...to this:
extern const char HexagonInstrNameData[] = {
/* 0 */ "G_FLOG10\0"
/* 9 */ "ENDLOOP0\0"
/* 18 */ "V6_vdd0\0"
/* 26 */ "PS_vdd0\0"
[...]
};
This should make debugging and exploration a lot easier for mortals,
while providing a significant compile-time reduction for common compilers.
To avoid issues with low implementation limits, this is disabled by
default for visual studio or when cross-compiling.
To force output one way or the other, pass
`--long-string-literals=<bool>` to `tablegen`
Reviewers: mstorsjo, rnk
Subscribers: llvm-commit
Differential Revision: https://reviews.llvm.org/D73044
Added:
Modified:
llvm/cmake/modules/TableGen.cmake
llvm/utils/TableGen/AsmWriterEmitter.cpp
llvm/utils/TableGen/InstrInfoEmitter.cpp
llvm/utils/TableGen/RegisterInfoEmitter.cpp
llvm/utils/TableGen/SequenceToOffsetTable.h
llvm/utils/TableGen/TableGen.cpp
Removed:
################################################################################
diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
index 9d2fcd9a793c..53d2050c5645 100644
--- a/llvm/cmake/modules/TableGen.cmake
+++ b/llvm/cmake/modules/TableGen.cmake
@@ -58,6 +58,14 @@ function(tablegen project ofn)
endif()
endif()
+ # MSVC can't support long string literals ("long" > 65534 bytes)[1], so if there's
+ # a possibility of generated tables being consumed by MSVC, generate arrays of
+ # char literals, instead. If we're cross-compiling, then conservatively assume
+ # that the source might be consumed by MSVC.
+ # [1] https://docs.microsoft.com/en-us/cpp/cpp/compiler-limits?view=vs-2017
+ if (MSVC)
+ list(APPEND LLVM_TABLEGEN_FLAGS "--long-string-literals=0")
+ endif()
if (CMAKE_GENERATOR MATCHES "Visual Studio")
# Visual Studio has problems with llvm-tblgen's native --write-if-changed
# behavior. Since it doesn't do restat optimizations anyway, just don't
diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp
index 58c0d32d44eb..5f60656c9f56 100644
--- a/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -381,7 +381,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
// Emit the string table itself.
O << " static const char AsmStrs[] = {\n";
- StringTable.emit(O, printChar);
+ StringTable.emit_string_literals(O);
O << " };\n\n";
// Emit the lookup tables in pieces to minimize wasted bytes.
@@ -538,7 +538,7 @@ emitRegisterNameString(raw_ostream &O, StringRef AltName,
StringTable.layout();
O << " static const char AsmStrs" << AltName << "[] = {\n";
- StringTable.emit(O, printChar);
+ StringTable.emit_string_literals(O);
O << " };\n\n";
O << " static const " << getMinimalTypeForRange(StringTable.size() - 1, 32)
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 6ab58bd26a2c..db1822696714 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -570,7 +570,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
// Emit the array of instruction names.
InstrNames.layout();
OS << "extern const char " << TargetName << "InstrNameData[] = {\n";
- InstrNames.emit(OS, printChar);
+ InstrNames.emit_string_literals(OS);
OS << "};\n\n";
OS << "extern const unsigned " << TargetName <<"InstrNameIndices[] = {";
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 2586ec671b2a..eff9a7e13181 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -993,7 +993,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
// Emit the string table.
RegStrings.layout();
OS << "extern const char " << TargetName << "RegStrings[] = {\n";
- RegStrings.emit(OS, printChar);
+ RegStrings.emit_string_literals(OS);
OS << "};\n\n";
OS << "extern const MCRegisterDesc " << TargetName
@@ -1066,7 +1066,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
RegClassStrings.layout();
OS << "extern const char " << TargetName << "RegClassStrings[] = {\n";
- RegClassStrings.emit(OS, printChar);
+ RegClassStrings.emit_string_literals(OS);
OS << "};\n\n";
OS << "extern const MCRegisterClass " << TargetName
diff --git a/llvm/utils/TableGen/SequenceToOffsetTable.h b/llvm/utils/TableGen/SequenceToOffsetTable.h
index 327da39f4774..d8db38e1a95e 100644
--- a/llvm/utils/TableGen/SequenceToOffsetTable.h
+++ b/llvm/utils/TableGen/SequenceToOffsetTable.h
@@ -15,6 +15,7 @@
#ifndef LLVM_UTILS_TABLEGEN_SEQUENCETOOFFSETTABLE_H
#define LLVM_UTILS_TABLEGEN_SEQUENCETOOFFSETTABLE_H
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -23,6 +24,61 @@
#include <map>
namespace llvm {
+extern llvm::cl::opt<bool> EmitLongStrLiterals;
+
+// Helper function for SequenceToOffsetTable<string>.
+static inline void printStrLitEscChar(raw_ostream &OS, char C) {
+ const char *Escapes[] = {
+ "\\000", "\\001", "\\002", "\\003", "\\004", "\\005", "\\006", "\\007",
+ "\\010", "\\t", "\\n", "\\013", "\\014", "\\r", "\\016", "\\017",
+ "\\020", "\\021", "\\022", "\\023", "\\024", "\\025", "\\026", "\\027",
+ "\\030", "\\031", "\\032", "\\033", "\\034", "\\035", "\\036", "\\037",
+ " ", "!", "\\\"", "#", "$", "%", "&", "'",
+ "(", ")", "*", "+", ",", "-", ".", "/",
+ "0", "1", "2", "3", "4", "5", "6", "7",
+ "8", "9", ":", ";", "<", "=", ">", "?",
+ "@", "A", "B", "C", "D", "E", "F", "G",
+ "H", "I", "J", "K", "L", "M", "N", "O",
+ "P", "Q", "R", "S", "T", "U", "V", "W",
+ "X", "Y", "Z", "[", "\\\\", "]", "^", "_",
+ "`", "a", "b", "c", "d", "e", "f", "g",
+ "h", "i", "j", "k", "l", "m", "n", "o",
+ "p", "q", "r", "s", "t", "u", "v", "w",
+ "x", "y", "z", "{", "|", "}", "~", "\\177",
+ "\\200", "\\201", "\\202", "\\203", "\\204", "\\205", "\\206", "\\207",
+ "\\210", "\\211", "\\212", "\\213", "\\214", "\\215", "\\216", "\\217",
+ "\\220", "\\221", "\\222", "\\223", "\\224", "\\225", "\\226", "\\227",
+ "\\230", "\\231", "\\232", "\\233", "\\234", "\\235", "\\236", "\\237",
+ "\\240", "\\241", "\\242", "\\243", "\\244", "\\245", "\\246", "\\247",
+ "\\250", "\\251", "\\252", "\\253", "\\254", "\\255", "\\256", "\\257",
+ "\\260", "\\261", "\\262", "\\263", "\\264", "\\265", "\\266", "\\267",
+ "\\270", "\\271", "\\272", "\\273", "\\274", "\\275", "\\276", "\\277",
+ "\\300", "\\301", "\\302", "\\303", "\\304", "\\305", "\\306", "\\307",
+ "\\310", "\\311", "\\312", "\\313", "\\314", "\\315", "\\316", "\\317",
+ "\\320", "\\321", "\\322", "\\323", "\\324", "\\325", "\\326", "\\327",
+ "\\330", "\\331", "\\332", "\\333", "\\334", "\\335", "\\336", "\\337",
+ "\\340", "\\341", "\\342", "\\343", "\\344", "\\345", "\\346", "\\347",
+ "\\350", "\\351", "\\352", "\\353", "\\354", "\\355", "\\356", "\\357",
+ "\\360", "\\361", "\\362", "\\363", "\\364", "\\365", "\\366", "\\367",
+ "\\370", "\\371", "\\372", "\\373", "\\374", "\\375", "\\376", "\\377"};
+
+ static_assert(sizeof Escapes / sizeof Escapes[0] ==
+ std::numeric_limits<unsigned char>::max() + 1,
+ "unsupported character type");
+ OS << Escapes[static_cast<unsigned char>(C)];
+}
+
+static inline void printChar(raw_ostream &OS, char C) {
+ unsigned char UC(C);
+ if (isalnum(UC) || ispunct(UC)) {
+ OS << '\'';
+ if (C == '\\' || C == '\'')
+ OS << '\\';
+ OS << C << '\'';
+ } else {
+ OS << unsigned(UC);
+ }
+}
/// SequenceToOffsetTable - Collect a number of terminated sequences of T.
/// Compute the layout of a table that contains all the sequences, possibly by
@@ -108,6 +164,24 @@ class SequenceToOffsetTable {
return I->second + (I->first.size() - Seq.size());
}
+ /// `emit_string_literals` - Print out the table as the body of an array
+ /// initializer, where each element is a C string literal terminated by
+ /// `Term`. Calls the the given `Print` function to format individual
+ /// elements.
+ void emit_string_literals(raw_ostream &OS) const {
+ assert(Entries && "Call layout() before emit_string_literals()");
+ if (!EmitLongStrLiterals) {
+ return emit(OS, printChar, "0");
+ }
+ for (auto I : Seqs) {
+ OS << " /* " << I.second << " */ \"";
+ for (auto C: I.first) {
+ printStrLitEscChar(OS, C);
+ }
+ OS << "\\0\"\n";
+ }
+ }
+
/// emit - Print out the table as the body of an array initializer.
/// Use the Print function to print elements.
void emit(raw_ostream &OS,
@@ -127,19 +201,6 @@ class SequenceToOffsetTable {
}
};
-// Helper function for SequenceToOffsetTable<string>.
-static inline void printChar(raw_ostream &OS, char C) {
- unsigned char UC(C);
- if (isalnum(UC) || ispunct(UC)) {
- OS << '\'';
- if (C == '\\' || C == '\'')
- OS << '\\';
- OS << C << '\'';
- } else {
- OS << unsigned(UC);
- }
-}
-
} // end namespace llvm
#endif
diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp
index bdb963c15d32..6da6599eb54e 100644
--- a/llvm/utils/TableGen/TableGen.cpp
+++ b/llvm/utils/TableGen/TableGen.cpp
@@ -60,6 +60,12 @@ namespace llvm {
/// Storage for TimeRegionsOpt as a global so that backends aren't required to
/// include CommandLine.h
bool TimeRegions = false;
+cl::opt<bool> EmitLongStrLiterals(
+ "long-string-literals",
+ cl::desc("when emitting large string tables, prefer string literals over "
+ "comma-separated char literals. This can be a readability and "
+ "compile-time performance win, but upsets some compilers"),
+ cl::Hidden, cl::init(true));
} // end namespace llvm
namespace {
More information about the llvm-commits
mailing list