[llvm] [TableGen] Implement a preprocessor (PR #120113)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 19 08:44:46 PST 2024


================
@@ -68,12 +68,65 @@ static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
     "no-warn-on-unused-template-args",
     cl::desc("Disable unused template argument warnings."));
 
+static cl::opt<bool> Preprocess("E", cl::desc("Write preprocessed output"));
+
 static int reportError(const char *ProgName, Twine Msg) {
   errs() << ProgName << ": " << Msg;
   errs().flush();
   return 1;
 }
 
+/// Encapsulate file, line and column numbers from SourceMgr.
+struct SMCoords {
+  unsigned Buf = 0;
+  unsigned Line = 0;
+  unsigned Col = 0;
+  SMCoords() = default;
+  SMCoords(const SourceMgr &Mgr, SMLoc Loc) {
+    Buf = Mgr.FindBufferContainingLoc(Loc);
+    // TODO: SourceMgr::getLineAndColumn is not a fast method. Find a better way
+    // to do this. For example we don't need the column number for every token,
+    // only the first token on each output line.
+    std::tie(Line, Col) = Mgr.getLineAndColumn(Loc, Buf);
+  }
+};
+
+/// Create preprocessed output for `-E` option.
+static int preprocessInput(raw_ostream &OS) {
+  TGLexer Lex(SrcMgr, MacroNames);
+  SMCoords Last;
+  bool Any = false;
+  while (true) {
+    Lex.Lex();
+    if (Lex.getCode() == tgtok::Eof || Lex.getCode() == tgtok::Error)
+      break;
+    SMCoords This(SrcMgr, Lex.getLoc());
+    if (This.Buf == Last.Buf && This.Line == Last.Line) {
+      // Add a single space between tokens on the same line. This is overkill in
+      // many cases but at least it will parse correctly.
+      OS << ' ';
+    } else if (Last.Buf) {
+      // Always start a new line when including a new file or popping back out
+      // to the previous file. This is just a heuristic to make the output look
+      // reasonably pretty.
+      OS << '\n';
+      // Indent the first token on a line to its original indentation, to make
+      // the output look pretty.
+      OS.indent(This.Col - 1);
+    }
+
+    const char *Start = Lex.getLoc().getPointer();
+    const char *End = Lex.getLocRange().End.getPointer();
+    OS << StringRef(Start, End - Start);
+    Any = true;
+
+    Last = This;
+  }
+  if (Any)
+    OS << '\n';
+  return Lex.getCode() == tgtok::Error;
----------------
jayfoad wrote:

I added some one-liner tests for some error cases.

https://github.com/llvm/llvm-project/pull/120113


More information about the llvm-commits mailing list