[llvm] [TableGen] Implement a preprocessor (PR #120113)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 17 02:29:56 PST 2024


https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/120113

>From c1f54d97b87a5ac13ca74ee7404ef471aabb8941 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 16 Dec 2024 17:01:29 +0000
Subject: [PATCH 1/4] [TableGen] Implement a preprocessor

Implement `llvm-tblgen -E` to preprocess the input files, generating a
single output file which can be fed back into TableGen.
---
 llvm/docs/CommandGuide/tblgen.rst    |   3 +
 llvm/lib/TableGen/Main.cpp           | 107 +++++++++++++++++++++------
 llvm/test/TableGen/x86-preprocess.td |   7 ++
 3 files changed, 93 insertions(+), 24 deletions(-)
 create mode 100644 llvm/test/TableGen/x86-preprocess.td

diff --git a/llvm/docs/CommandGuide/tblgen.rst b/llvm/docs/CommandGuide/tblgen.rst
index 92186579e682df..36a17c0422d546 100644
--- a/llvm/docs/CommandGuide/tblgen.rst
+++ b/llvm/docs/CommandGuide/tblgen.rst
@@ -103,6 +103,9 @@ General Options
 
   Write the output file only if it is new or has changed.
 
+.. option:: -E
+
+  Preprocess the source files instead of running any backend.
 
 clang-tblgen Options
 ~~~~~~~~~~~~~~~~~~~~
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 55a99cbfc58acd..86ed9ad1e9ca08 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -68,12 +68,66 @@ static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
     "no-warn-on-unused-template-args",
     cl::desc("Disable unused template argument warnings."));
 
+static cl::opt<bool> Preprocess("E", cl::desc("Write preprocessed output"));
+
 static int reportError(const char *ProgName, Twine Msg) {
   errs() << ProgName << ": " << Msg;
   errs().flush();
   return 1;
 }
 
+/// Encapsulate file, line and column numbers from SourceMgr.
+struct SMCoords {
+  unsigned Buf = 0;
+  unsigned Line = 0;
+  unsigned Col = 0;
+  SMCoords() = default;
+  SMCoords(const SourceMgr &Mgr, SMLoc Loc) {
+    Buf = Mgr.FindBufferContainingLoc(Loc);
+    // TODO: SourceMgr::getLineAndColumn is not a fast method. Find a better way
+    // to do this. For example we don't need the column number for every token,
+    // only the first token on each output line.
+    std::tie(Line, Col) = Mgr.getLineAndColumn(Loc, Buf);
+  }
+};
+
+/// Create preprocessed output for `-E` option.
+static int preprocessInput(raw_ostream &OS) {
+  TGLexer Lex(SrcMgr, {});
+  SMCoords Last;
+  bool Any = false;
+  while (true) {
+    Lex.Lex();
+    if (Lex.getCode() == tgtok::Eof || Lex.getCode() == tgtok::Error)
+      break;
+    SMCoords This(SrcMgr, Lex.getLoc());
+    if (This.Buf == Last.Buf && This.Line == Last.Line) {
+      // Add a single space between tokens on the same line. This is overkill in
+      // many cases but at least it will parse correctly.
+      OS << ' ';
+    } else if (Last.Buf) {
+      // Always start a new line when including a new file or popping back out
+      // to the previous file. This is just a heuristic to make the output look
+      // reasonably pretty.
+      OS << '\n';
+      // Indent the first token on a line to its original indentation, to make
+      // the output look pretty.
+      for (unsigned I = 1; I < This.Col; ++I)
+        OS << ' ';
+    }
+
+    const char *Start = Lex.getLoc().getPointer();
+    const char *End = Lex.getLocRange().End.getPointer();
+    OS << StringRef(Start, End - Start);
+    Any = true;
+
+    Last = This;
+  }
+  if (Any)
+    OS << '\n';
+  return Lex.getCode() == tgtok::Error;
+}
+
 /// Create a dependency file for `-d` option.
 ///
 /// This functionality is really only for the benefit of the build system.
@@ -122,32 +176,37 @@ int llvm::TableGenMain(const char *argv0,
   // it later.
   SrcMgr.setIncludeDirs(IncludeDirs);
 
-  TGParser Parser(SrcMgr, MacroNames, Records, NoWarnOnUnusedTemplateArgs);
-
-  if (Parser.ParseFile())
-    return 1;
-  Timer.stopTimer();
-
-  // Write output to memory.
-  Timer.startBackendTimer("Backend overall");
   std::string OutString;
   raw_string_ostream Out(OutString);
-  unsigned status = 0;
-  // ApplyCallback will return true if it did not apply any callback. In that
-  // case, attempt to apply the MainFn.
-  if (TableGen::Emitter::ApplyCallback(Records, Out))
-    status = MainFn ? MainFn(Out, Records) : 1;
-  Timer.stopBackendTimer();
-  if (status)
-    return 1;
-
-  // Always write the depfile, even if the main output hasn't changed.
-  // If it's missing, Ninja considers the output dirty.  If this was below
-  // the early exit below and someone deleted the .inc.d file but not the .inc
-  // file, tablegen would never write the depfile.
-  if (!DependFilename.empty()) {
-    if (int Ret = createDependencyFile(Parser, argv0))
-      return Ret;
+  if (Preprocess) {
+    if (preprocessInput(Out))
+      return 1;
+  } else {
+    TGParser Parser(SrcMgr, MacroNames, Records, NoWarnOnUnusedTemplateArgs);
+
+    if (Parser.ParseFile())
+      return 1;
+    Timer.stopTimer();
+
+    // Write output to memory.
+    Timer.startBackendTimer("Backend overall");
+    unsigned status = 0;
+    // ApplyCallback will return true if it did not apply any callback. In that
+    // case, attempt to apply the MainFn.
+    if (TableGen::Emitter::ApplyCallback(Records, Out))
+      status = MainFn ? MainFn(Out, Records) : 1;
+    Timer.stopBackendTimer();
+    if (status)
+      return 1;
+
+    // Always write the depfile, even if the main output hasn't changed.
+    // If it's missing, Ninja considers the output dirty.  If this was below
+    // the early exit below and someone deleted the .inc.d file but not the .inc
+    // file, tablegen would never write the depfile.
+    if (!DependFilename.empty()) {
+      if (int Ret = createDependencyFile(Parser, argv0))
+        return Ret;
+    }
   }
 
   Timer.startTimer("Write output");
diff --git a/llvm/test/TableGen/x86-preprocess.td b/llvm/test/TableGen/x86-preprocess.td
new file mode 100644
index 00000000000000..a156d1efc9fe65
--- /dev/null
+++ b/llvm/test/TableGen/x86-preprocess.td
@@ -0,0 +1,7 @@
+// Check that parsing the preprocessor output creates exactly the same records
+// as parsing the original files directly.
+
+// RUN: llvm-tblgen %p/../../lib/Target/X86/X86.td -I %p/../../lib/Target/X86 -I %p/../../include -o %t -E
+// RUN: llvm-tblgen %p/../../lib/Target/X86/X86.td -I %p/../../lib/Target/X86 -I %p/../../include -o %t1
+// RUN: llvm-tblgen %t -o %t2
+// RUN: cmp %t1 %t2

>From 128005489b5271c0e8cb5901503d915870864e0a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 17 Dec 2024 08:42:43 +0000
Subject: [PATCH 2/4] Fix macro handling

---
 llvm/lib/TableGen/Main.cpp       |  2 +-
 llvm/test/TableGen/preprocess.td | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/TableGen/preprocess.td

diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 86ed9ad1e9ca08..4046492481e032 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -93,7 +93,7 @@ struct SMCoords {
 
 /// Create preprocessed output for `-E` option.
 static int preprocessInput(raw_ostream &OS) {
-  TGLexer Lex(SrcMgr, {});
+  TGLexer Lex(SrcMgr, MacroNames);
   SMCoords Last;
   bool Any = false;
   while (true) {
diff --git a/llvm/test/TableGen/preprocess.td b/llvm/test/TableGen/preprocess.td
new file mode 100644
index 00000000000000..b819b20e016e54
--- /dev/null
+++ b/llvm/test/TableGen/preprocess.td
@@ -0,0 +1,13 @@
+// Check that preprocessing respects macro definitions.
+
+// RUN: llvm-tblgen %s -E -DFOO | FileCheck %s -check-prefix=CHECK-FOO
+// RUN: llvm-tblgen %s -E | FileCheck %s
+
+// CHECK-FOO: yes
+// CHECK: no
+
+#ifdef FOO
+yes
+#else
+no
+#endif

>From 4ce27e415db6fe5a416366f40d3702c1926f0d63 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 17 Dec 2024 10:19:02 +0000
Subject: [PATCH 3/4] Switch from X86.td to ARM.td for much faster testing

---
 llvm/test/TableGen/arm-preprocess.td | 7 +++++++
 llvm/test/TableGen/x86-preprocess.td | 7 -------
 2 files changed, 7 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/TableGen/arm-preprocess.td
 delete mode 100644 llvm/test/TableGen/x86-preprocess.td

diff --git a/llvm/test/TableGen/arm-preprocess.td b/llvm/test/TableGen/arm-preprocess.td
new file mode 100644
index 00000000000000..d678dc7a6e8526
--- /dev/null
+++ b/llvm/test/TableGen/arm-preprocess.td
@@ -0,0 +1,7 @@
+// Check that parsing the preprocessor output creates exactly the same records
+// as parsing the original files directly.
+
+// RUN: llvm-tblgen %p/../../lib/Target/ARM/ARM.td -I %p/../../lib/Target/ARM -I %p/../../include -o %t -E
+// RUN: llvm-tblgen %p/../../lib/Target/ARM/ARM.td -I %p/../../lib/Target/ARM -I %p/../../include -o %t1
+// RUN: llvm-tblgen %t -o %t2
+// RUN: cmp %t1 %t2
diff --git a/llvm/test/TableGen/x86-preprocess.td b/llvm/test/TableGen/x86-preprocess.td
deleted file mode 100644
index a156d1efc9fe65..00000000000000
--- a/llvm/test/TableGen/x86-preprocess.td
+++ /dev/null
@@ -1,7 +0,0 @@
-// Check that parsing the preprocessor output creates exactly the same records
-// as parsing the original files directly.
-
-// RUN: llvm-tblgen %p/../../lib/Target/X86/X86.td -I %p/../../lib/Target/X86 -I %p/../../include -o %t -E
-// RUN: llvm-tblgen %p/../../lib/Target/X86/X86.td -I %p/../../lib/Target/X86 -I %p/../../include -o %t1
-// RUN: llvm-tblgen %t -o %t2
-// RUN: cmp %t1 %t2

>From 7a496029dd229eaa36e81ef2053761725ee0d3c9 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 17 Dec 2024 10:25:46 +0000
Subject: [PATCH 4/4] Use indent

---
 llvm/lib/TableGen/Main.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 4046492481e032..0188c4406b9370 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -112,8 +112,7 @@ static int preprocessInput(raw_ostream &OS) {
       OS << '\n';
       // Indent the first token on a line to its original indentation, to make
       // the output look pretty.
-      for (unsigned I = 1; I < This.Col; ++I)
-        OS << ' ';
+      OS.indent(This.Col - 1);
     }
 
     const char *Start = Lex.getLoc().getPointer();



More information about the llvm-commits mailing list