[llvm] ce23515 - [tablegen] Emit string literals instead of char arrays
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 07:19:32 PST 2020
Hi,
I'm getting this warning now:
In file included from /w/src/llvm.org/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp:44:
/w/bld/org/lib/Target/X86/X86GenInstrInfo.inc:32952:11: warning: string literal of length 209447 exceeds maximum length 65536 that C++ compilers are required to support [-Woverlength-strings]
/* 0 */ "G_FLOG10\0"
^~~~~~~~~~~~
--
Krzysztof Parzyszek kparzysz at quicinc.com AI tools development
> -----Original Message-----
> From: llvm-commits <llvm-commits-bounces at lists.llvm.org> On Behalf Of Luke
> Drummond via llvm-commits
> Sent: Thursday, January 23, 2020 8:00 AM
> To: llvm-commits at lists.llvm.org
> Subject: [EXT] [llvm] ce23515 - [tablegen] Emit string literals instead of
> char arrays
>
>
> Author: Luke Drummond
> Date: 2020-01-23T13:57:20Z
> New Revision: ce23515f5ab01161c98449d833b3ae013b553aa8
>
> URL: https://github.com/llvm/llvm-
> project/commit/ce23515f5ab01161c98449d833b3ae013b553aa8
> DIFF: https://github.com/llvm/llvm-
> project/commit/ce23515f5ab01161c98449d833b3ae013b553aa8.diff
>
> LOG: [tablegen] Emit string literals instead of char arrays
>
> This changes the generated (Instr|Asm|Reg|Regclass)Name tables from this
> form:
> extern const char HexagonInstrNameData[] = {
> /* 0 */ 'G', '_', 'F', 'L', 'O', 'G', '1', '0', 0,
> /* 9 */ 'E', 'N', 'D', 'L', 'O', 'O', 'P', '0', 0,
> /* 18 */ 'V', '6', '_', 'v', 'd', 'd', '0', 0,
> /* 26 */ 'P', 'S', '_', 'v', 'd', 'd', '0', 0,
> [...]
> };
>
> ...to this:
>
> extern const char HexagonInstrNameData[] = {
> /* 0 */ "G_FLOG10\0"
> /* 9 */ "ENDLOOP0\0"
> /* 18 */ "V6_vdd0\0"
> /* 26 */ "PS_vdd0\0"
> [...]
> };
>
> This should make debugging and exploration a lot easier for mortals, while
> providing a significant compile-time reduction for common compilers.
>
> To avoid issues with low implementation limits, this is disabled by default
> for visual studio or when cross-compiling.
>
> To force output one way or the other, pass `--long-string-literals=<bool>` to
> `tablegen`
>
> Reviewers: mstorsjo, rnk
>
> Subscribers: llvm-commit
>
> Differential Revision: https://reviews.llvm.org/D73044
>
> Added:
>
>
> Modified:
> llvm/cmake/modules/TableGen.cmake
> llvm/utils/TableGen/AsmWriterEmitter.cpp
> llvm/utils/TableGen/InstrInfoEmitter.cpp
> llvm/utils/TableGen/RegisterInfoEmitter.cpp
> llvm/utils/TableGen/SequenceToOffsetTable.h
> llvm/utils/TableGen/TableGen.cpp
>
> Removed:
>
>
>
> #############################################################################
> ###
> diff --git a/llvm/cmake/modules/TableGen.cmake
> b/llvm/cmake/modules/TableGen.cmake
> index 9d2fcd9a793c..53d2050c5645 100644
> --- a/llvm/cmake/modules/TableGen.cmake
> +++ b/llvm/cmake/modules/TableGen.cmake
> @@ -58,6 +58,14 @@ function(tablegen project ofn)
> endif()
> endif()
>
> + # MSVC can't support long string literals ("long" > 65534 bytes)[1],
> + so if there's # a possibility of generated tables being consumed by
> + MSVC, generate arrays of # char literals, instead. If we're
> + cross-compiling, then conservatively assume # that the source might be
> consumed by MSVC.
> + # [1]
> + https://docs.microsoft.com/en-us/cpp/cpp/compiler-limits?view=vs-2017
> + if (MSVC)
> + list(APPEND LLVM_TABLEGEN_FLAGS "--long-string-literals=0")
> + endif()
> if (CMAKE_GENERATOR MATCHES "Visual Studio")
> # Visual Studio has problems with llvm-tblgen's native --write-if-
> changed
> # behavior. Since it doesn't do restat optimizations anyway, just don't
>
> diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp
> b/llvm/utils/TableGen/AsmWriterEmitter.cpp
> index 58c0d32d44eb..5f60656c9f56 100644
> --- a/llvm/utils/TableGen/AsmWriterEmitter.cpp
> +++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp
> @@ -381,7 +381,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream
> &O) {
>
> // Emit the string table itself.
> O << " static const char AsmStrs[] = {\n";
> - StringTable.emit(O, printChar);
> + StringTable.emit_string_literals(O);
> O << " };\n\n";
>
> // Emit the lookup tables in pieces to minimize wasted bytes.
> @@ -538,7 +538,7 @@ emitRegisterNameString(raw_ostream &O, StringRef AltName,
>
> StringTable.layout();
> O << " static const char AsmStrs" << AltName << "[] = {\n";
> - StringTable.emit(O, printChar);
> + StringTable.emit_string_literals(O);
> O << " };\n\n";
>
> O << " static const " << getMinimalTypeForRange(StringTable.size() - 1,
> 32)
>
> diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp
> b/llvm/utils/TableGen/InstrInfoEmitter.cpp
> index 6ab58bd26a2c..db1822696714 100644
> --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
> +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
> @@ -570,7 +570,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
> // Emit the array of instruction names.
> InstrNames.layout();
> OS << "extern const char " << TargetName << "InstrNameData[] = {\n";
> - InstrNames.emit(OS, printChar);
> + InstrNames.emit_string_literals(OS);
> OS << "};\n\n";
>
> OS << "extern const unsigned " << TargetName <<"InstrNameIndices[] = {";
>
> diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
> b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
> index 2586ec671b2a..eff9a7e13181 100644
> --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
> +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
> @@ -993,7 +993,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS,
> CodeGenTarget &Target,
> // Emit the string table.
> RegStrings.layout();
> OS << "extern const char " << TargetName << "RegStrings[] = {\n";
> - RegStrings.emit(OS, printChar);
> + RegStrings.emit_string_literals(OS);
> OS << "};\n\n";
>
> OS << "extern const MCRegisterDesc " << TargetName @@ -1066,7 +1066,7 @@
> RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
>
> RegClassStrings.layout();
> OS << "extern const char " << TargetName << "RegClassStrings[] = {\n";
> - RegClassStrings.emit(OS, printChar);
> + RegClassStrings.emit_string_literals(OS);
> OS << "};\n\n";
>
> OS << "extern const MCRegisterClass " << TargetName
>
> diff --git a/llvm/utils/TableGen/SequenceToOffsetTable.h
> b/llvm/utils/TableGen/SequenceToOffsetTable.h
> index 327da39f4774..d8db38e1a95e 100644
> --- a/llvm/utils/TableGen/SequenceToOffsetTable.h
> +++ b/llvm/utils/TableGen/SequenceToOffsetTable.h
> @@ -15,6 +15,7 @@
> #ifndef LLVM_UTILS_TABLEGEN_SEQUENCETOOFFSETTABLE_H
> #define LLVM_UTILS_TABLEGEN_SEQUENCETOOFFSETTABLE_H
>
> +#include "llvm/Support/CommandLine.h"
> #include "llvm/Support/raw_ostream.h"
> #include <algorithm>
> #include <cassert>
> @@ -23,6 +24,61 @@
> #include <map>
>
> namespace llvm {
> +extern llvm::cl::opt<bool> EmitLongStrLiterals;
> +
> +// Helper function for SequenceToOffsetTable<string>.
> +static inline void printStrLitEscChar(raw_ostream &OS, char C) {
> + const char *Escapes[] = {
> + "\\000", "\\001", "\\002", "\\003", "\\004", "\\005", "\\006",
> "\\007",
> + "\\010", "\\t", "\\n", "\\013", "\\014", "\\r", "\\016",
> "\\017",
> + "\\020", "\\021", "\\022", "\\023", "\\024", "\\025", "\\026",
> "\\027",
> + "\\030", "\\031", "\\032", "\\033", "\\034", "\\035", "\\036",
> "\\037",
> + " ", "!", "\\\"", "#", "$", "%", "&", "'",
> + "(", ")", "*", "+", ",", "-", ".", "/",
> + "0", "1", "2", "3", "4", "5", "6", "7",
> + "8", "9", ":", ";", "<", "=", ">", "?",
> + "@", "A", "B", "C", "D", "E", "F", "G",
> + "H", "I", "J", "K", "L", "M", "N", "O",
> + "P", "Q", "R", "S", "T", "U", "V", "W",
> + "X", "Y", "Z", "[", "\\\\", "]", "^", "_",
> + "`", "a", "b", "c", "d", "e", "f", "g",
> + "h", "i", "j", "k", "l", "m", "n", "o",
> + "p", "q", "r", "s", "t", "u", "v", "w",
> + "x", "y", "z", "{", "|", "}", "~",
> "\\177",
> + "\\200", "\\201", "\\202", "\\203", "\\204", "\\205", "\\206",
> "\\207",
> + "\\210", "\\211", "\\212", "\\213", "\\214", "\\215", "\\216",
> "\\217",
> + "\\220", "\\221", "\\222", "\\223", "\\224", "\\225", "\\226",
> "\\227",
> + "\\230", "\\231", "\\232", "\\233", "\\234", "\\235", "\\236",
> "\\237",
> + "\\240", "\\241", "\\242", "\\243", "\\244", "\\245", "\\246",
> "\\247",
> + "\\250", "\\251", "\\252", "\\253", "\\254", "\\255", "\\256",
> "\\257",
> + "\\260", "\\261", "\\262", "\\263", "\\264", "\\265", "\\266",
> "\\267",
> + "\\270", "\\271", "\\272", "\\273", "\\274", "\\275", "\\276",
> "\\277",
> + "\\300", "\\301", "\\302", "\\303", "\\304", "\\305", "\\306",
> "\\307",
> + "\\310", "\\311", "\\312", "\\313", "\\314", "\\315", "\\316",
> "\\317",
> + "\\320", "\\321", "\\322", "\\323", "\\324", "\\325", "\\326",
> "\\327",
> + "\\330", "\\331", "\\332", "\\333", "\\334", "\\335", "\\336",
> "\\337",
> + "\\340", "\\341", "\\342", "\\343", "\\344", "\\345", "\\346",
> "\\347",
> + "\\350", "\\351", "\\352", "\\353", "\\354", "\\355", "\\356",
> "\\357",
> + "\\360", "\\361", "\\362", "\\363", "\\364", "\\365", "\\366",
> "\\367",
> + "\\370", "\\371", "\\372", "\\373", "\\374", "\\375", "\\376",
> +"\\377"};
> +
> + static_assert(sizeof Escapes / sizeof Escapes[0] ==
> + std::numeric_limits<unsigned char>::max() + 1,
> + "unsupported character type");
> + OS << Escapes[static_cast<unsigned char>(C)]; }
> +
> +static inline void printChar(raw_ostream &OS, char C) {
> + unsigned char UC(C);
> + if (isalnum(UC) || ispunct(UC)) {
> + OS << '\'';
> + if (C == '\\' || C == '\'')
> + OS << '\\';
> + OS << C << '\'';
> + } else {
> + OS << unsigned(UC);
> + }
> +}
>
> /// SequenceToOffsetTable - Collect a number of terminated sequences of T.
> /// Compute the layout of a table that contains all the sequences, possibly
> by @@ -108,6 +164,24 @@ class SequenceToOffsetTable {
> return I->second + (I->first.size() - Seq.size());
> }
>
> + /// `emit_string_literals` - Print out the table as the body of an
> + array /// initializer, where each element is a C string literal
> + terminated by /// `Term`. Calls the the given `Print` function to
> + format individual /// elements.
> + void emit_string_literals(raw_ostream &OS) const {
> + assert(Entries && "Call layout() before emit_string_literals()");
> + if (!EmitLongStrLiterals) {
> + return emit(OS, printChar, "0");
> + }
> + for (auto I : Seqs) {
> + OS << " /* " << I.second << " */ \"";
> + for (auto C: I.first) {
> + printStrLitEscChar(OS, C);
> + }
> + OS << "\\0\"\n";
> + }
> + }
> +
> /// emit - Print out the table as the body of an array initializer.
> /// Use the Print function to print elements.
> void emit(raw_ostream &OS,
> @@ -127,19 +201,6 @@ class SequenceToOffsetTable {
> }
> };
>
> -// Helper function for SequenceToOffsetTable<string>.
> -static inline void printChar(raw_ostream &OS, char C) {
> - unsigned char UC(C);
> - if (isalnum(UC) || ispunct(UC)) {
> - OS << '\'';
> - if (C == '\\' || C == '\'')
> - OS << '\\';
> - OS << C << '\'';
> - } else {
> - OS << unsigned(UC);
> - }
> -}
> -
> } // end namespace llvm
>
> #endif
>
> diff --git a/llvm/utils/TableGen/TableGen.cpp
> b/llvm/utils/TableGen/TableGen.cpp
> index bdb963c15d32..6da6599eb54e 100644
> --- a/llvm/utils/TableGen/TableGen.cpp
> +++ b/llvm/utils/TableGen/TableGen.cpp
> @@ -60,6 +60,12 @@ namespace llvm {
> /// Storage for TimeRegionsOpt as a global so that backends aren't required
> to /// include CommandLine.h bool TimeRegions = false;
> +cl::opt<bool> EmitLongStrLiterals(
> + "long-string-literals",
> + cl::desc("when emitting large string tables, prefer string literals over
> "
> + "comma-separated char literals. This can be a readability and "
> + "compile-time performance win, but upsets some compilers"),
> + cl::Hidden, cl::init(true));
> } // end namespace llvm
>
> namespace {
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list