[llvm] [TableGen] Implement a preprocessor (PR #120113)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 17 02:29:56 PST 2024
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/120113
>From c1f54d97b87a5ac13ca74ee7404ef471aabb8941 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 16 Dec 2024 17:01:29 +0000
Subject: [PATCH 1/4] [TableGen] Implement a preprocessor
Implement `llvm-tblgen -E` to preprocess the input files, generating a
single output file which can be fed back into TableGen.
---
llvm/docs/CommandGuide/tblgen.rst | 3 +
llvm/lib/TableGen/Main.cpp | 107 +++++++++++++++++++++------
llvm/test/TableGen/x86-preprocess.td | 7 ++
3 files changed, 93 insertions(+), 24 deletions(-)
create mode 100644 llvm/test/TableGen/x86-preprocess.td
diff --git a/llvm/docs/CommandGuide/tblgen.rst b/llvm/docs/CommandGuide/tblgen.rst
index 92186579e682df..36a17c0422d546 100644
--- a/llvm/docs/CommandGuide/tblgen.rst
+++ b/llvm/docs/CommandGuide/tblgen.rst
@@ -103,6 +103,9 @@ General Options
Write the output file only if it is new or has changed.
+.. option:: -E
+
+ Preprocess the source files instead of running any backend.
clang-tblgen Options
~~~~~~~~~~~~~~~~~~~~
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 55a99cbfc58acd..86ed9ad1e9ca08 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -68,12 +68,66 @@ static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
"no-warn-on-unused-template-args",
cl::desc("Disable unused template argument warnings."));
+static cl::opt<bool> Preprocess("E", cl::desc("Write preprocessed output"));
+
static int reportError(const char *ProgName, Twine Msg) {
errs() << ProgName << ": " << Msg;
errs().flush();
return 1;
}
+/// Encapsulate file, line and column numbers from SourceMgr.
+struct SMCoords {
+ unsigned Buf = 0;
+ unsigned Line = 0;
+ unsigned Col = 0;
+ SMCoords() = default;
+ SMCoords(const SourceMgr &Mgr, SMLoc Loc) {
+ Buf = Mgr.FindBufferContainingLoc(Loc);
+ // TODO: SourceMgr::getLineAndColumn is not a fast method. Find a better way
+ // to do this. For example we don't need the column number for every token,
+ // only the first token on each output line.
+ std::tie(Line, Col) = Mgr.getLineAndColumn(Loc, Buf);
+ }
+};
+
+/// Create preprocessed output for `-E` option.
+static int preprocessInput(raw_ostream &OS) {
+ TGLexer Lex(SrcMgr, {});
+ SMCoords Last;
+ bool Any = false;
+ while (true) {
+ Lex.Lex();
+ if (Lex.getCode() == tgtok::Eof || Lex.getCode() == tgtok::Error)
+ break;
+ SMCoords This(SrcMgr, Lex.getLoc());
+ if (This.Buf == Last.Buf && This.Line == Last.Line) {
+ // Add a single space between tokens on the same line. This is overkill in
+ // many cases but at least it will parse correctly.
+ OS << ' ';
+ } else if (Last.Buf) {
+ // Always start a new line when including a new file or popping back out
+ // to the previous file. This is just a heuristic to make the output look
+ // reasonably pretty.
+ OS << '\n';
+ // Indent the first token on a line to its original indentation, to make
+ // the output look pretty.
+ for (unsigned I = 1; I < This.Col; ++I)
+ OS << ' ';
+ }
+
+ const char *Start = Lex.getLoc().getPointer();
+ const char *End = Lex.getLocRange().End.getPointer();
+ OS << StringRef(Start, End - Start);
+ Any = true;
+
+ Last = This;
+ }
+ if (Any)
+ OS << '\n';
+ return Lex.getCode() == tgtok::Error;
+}
+
/// Create a dependency file for `-d` option.
///
/// This functionality is really only for the benefit of the build system.
@@ -122,32 +176,37 @@ int llvm::TableGenMain(const char *argv0,
// it later.
SrcMgr.setIncludeDirs(IncludeDirs);
- TGParser Parser(SrcMgr, MacroNames, Records, NoWarnOnUnusedTemplateArgs);
-
- if (Parser.ParseFile())
- return 1;
- Timer.stopTimer();
-
- // Write output to memory.
- Timer.startBackendTimer("Backend overall");
std::string OutString;
raw_string_ostream Out(OutString);
- unsigned status = 0;
- // ApplyCallback will return true if it did not apply any callback. In that
- // case, attempt to apply the MainFn.
- if (TableGen::Emitter::ApplyCallback(Records, Out))
- status = MainFn ? MainFn(Out, Records) : 1;
- Timer.stopBackendTimer();
- if (status)
- return 1;
-
- // Always write the depfile, even if the main output hasn't changed.
- // If it's missing, Ninja considers the output dirty. If this was below
- // the early exit below and someone deleted the .inc.d file but not the .inc
- // file, tablegen would never write the depfile.
- if (!DependFilename.empty()) {
- if (int Ret = createDependencyFile(Parser, argv0))
- return Ret;
+ if (Preprocess) {
+ if (preprocessInput(Out))
+ return 1;
+ } else {
+ TGParser Parser(SrcMgr, MacroNames, Records, NoWarnOnUnusedTemplateArgs);
+
+ if (Parser.ParseFile())
+ return 1;
+ Timer.stopTimer();
+
+ // Write output to memory.
+ Timer.startBackendTimer("Backend overall");
+ unsigned status = 0;
+ // ApplyCallback will return true if it did not apply any callback. In that
+ // case, attempt to apply the MainFn.
+ if (TableGen::Emitter::ApplyCallback(Records, Out))
+ status = MainFn ? MainFn(Out, Records) : 1;
+ Timer.stopBackendTimer();
+ if (status)
+ return 1;
+
+ // Always write the depfile, even if the main output hasn't changed.
+ // If it's missing, Ninja considers the output dirty. If this was below
+ // the early exit below and someone deleted the .inc.d file but not the .inc
+ // file, tablegen would never write the depfile.
+ if (!DependFilename.empty()) {
+ if (int Ret = createDependencyFile(Parser, argv0))
+ return Ret;
+ }
}
Timer.startTimer("Write output");
diff --git a/llvm/test/TableGen/x86-preprocess.td b/llvm/test/TableGen/x86-preprocess.td
new file mode 100644
index 00000000000000..a156d1efc9fe65
--- /dev/null
+++ b/llvm/test/TableGen/x86-preprocess.td
@@ -0,0 +1,7 @@
+// Check that parsing the preprocessor output creates exactly the same records
+// as parsing the original files directly.
+
+// RUN: llvm-tblgen %p/../../lib/Target/X86/X86.td -I %p/../../lib/Target/X86 -I %p/../../include -o %t -E
+// RUN: llvm-tblgen %p/../../lib/Target/X86/X86.td -I %p/../../lib/Target/X86 -I %p/../../include -o %t1
+// RUN: llvm-tblgen %t -o %t2
+// RUN: cmp %t1 %t2
>From 128005489b5271c0e8cb5901503d915870864e0a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 17 Dec 2024 08:42:43 +0000
Subject: [PATCH 2/4] Fix macro handling
---
llvm/lib/TableGen/Main.cpp | 2 +-
llvm/test/TableGen/preprocess.td | 13 +++++++++++++
2 files changed, 14 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/TableGen/preprocess.td
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 86ed9ad1e9ca08..4046492481e032 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -93,7 +93,7 @@ struct SMCoords {
/// Create preprocessed output for `-E` option.
static int preprocessInput(raw_ostream &OS) {
- TGLexer Lex(SrcMgr, {});
+ TGLexer Lex(SrcMgr, MacroNames);
SMCoords Last;
bool Any = false;
while (true) {
diff --git a/llvm/test/TableGen/preprocess.td b/llvm/test/TableGen/preprocess.td
new file mode 100644
index 00000000000000..b819b20e016e54
--- /dev/null
+++ b/llvm/test/TableGen/preprocess.td
@@ -0,0 +1,13 @@
+// Check that preprocessing respects macro definitions.
+
+// RUN: llvm-tblgen %s -E -DFOO | FileCheck %s -check-prefix=CHECK-FOO
+// RUN: llvm-tblgen %s -E | FileCheck %s
+
+// CHECK-FOO: yes
+// CHECK: no
+
+#ifdef FOO
+yes
+#else
+no
+#endif
>From 4ce27e415db6fe5a416366f40d3702c1926f0d63 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 17 Dec 2024 10:19:02 +0000
Subject: [PATCH 3/4] Switch from X86.td to ARM.td for much faster testing
---
llvm/test/TableGen/arm-preprocess.td | 7 +++++++
llvm/test/TableGen/x86-preprocess.td | 7 -------
2 files changed, 7 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/TableGen/arm-preprocess.td
delete mode 100644 llvm/test/TableGen/x86-preprocess.td
diff --git a/llvm/test/TableGen/arm-preprocess.td b/llvm/test/TableGen/arm-preprocess.td
new file mode 100644
index 00000000000000..d678dc7a6e8526
--- /dev/null
+++ b/llvm/test/TableGen/arm-preprocess.td
@@ -0,0 +1,7 @@
+// Check that parsing the preprocessor output creates exactly the same records
+// as parsing the original files directly.
+
+// RUN: llvm-tblgen %p/../../lib/Target/ARM/ARM.td -I %p/../../lib/Target/ARM -I %p/../../include -o %t -E
+// RUN: llvm-tblgen %p/../../lib/Target/ARM/ARM.td -I %p/../../lib/Target/ARM -I %p/../../include -o %t1
+// RUN: llvm-tblgen %t -o %t2
+// RUN: cmp %t1 %t2
diff --git a/llvm/test/TableGen/x86-preprocess.td b/llvm/test/TableGen/x86-preprocess.td
deleted file mode 100644
index a156d1efc9fe65..00000000000000
--- a/llvm/test/TableGen/x86-preprocess.td
+++ /dev/null
@@ -1,7 +0,0 @@
-// Check that parsing the preprocessor output creates exactly the same records
-// as parsing the original files directly.
-
-// RUN: llvm-tblgen %p/../../lib/Target/X86/X86.td -I %p/../../lib/Target/X86 -I %p/../../include -o %t -E
-// RUN: llvm-tblgen %p/../../lib/Target/X86/X86.td -I %p/../../lib/Target/X86 -I %p/../../include -o %t1
-// RUN: llvm-tblgen %t -o %t2
-// RUN: cmp %t1 %t2
>From 7a496029dd229eaa36e81ef2053761725ee0d3c9 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 17 Dec 2024 10:25:46 +0000
Subject: [PATCH 4/4] Use indent
---
llvm/lib/TableGen/Main.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 4046492481e032..0188c4406b9370 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -112,8 +112,7 @@ static int preprocessInput(raw_ostream &OS) {
OS << '\n';
// Indent the first token on a line to its original indentation, to make
// the output look pretty.
- for (unsigned I = 1; I < This.Col; ++I)
- OS << ' ';
+ OS.indent(This.Col - 1);
}
const char *Start = Lex.getLoc().getPointer();
More information about the llvm-commits
mailing list