[clang] [clang] Make -dump-tokens option align tokens (PR #164894)

Thu Oct 23 14:01:20 PDT 2025

https://github.com/alexpaniman created https://github.com/llvm/llvm-project/pull/164894

When using `-Xclang -dump-tokens`, the lexer dump output is currently difficult to read because the data are misaligned. The existing implementation simply separates the token name, spelling, flags, and location using `'\t'`, which results in inconsistent spacing.

For example, the current output looks like this on provided in this patch example **(BEFORE THIS PR)**:

<img width="2936" height="632" alt="image" src="https://github.com/user-attachments/assets/ad893958-6d57-4a76-8838-7fc56e37e6a7" />

# Changes

This small PR improves the readability of the token dump by:

+ Adding padding after the token name and after the spelling (the padding amount was chosen empirically to produce good average alignment).
+ Swapping the order of location and flags (since flags can take up a lot of space and disrupt alignment).

The result is a more readable output **(AFTER THIS PR)**:

<img width="1470" height="315" alt="image" src="https://github.com/user-attachments/assets/c24f24e5-a431-42cc-b5b6-232bac5c635e" />


>From 805d4a513d3927620058d9248d451bd4948e709d Mon Sep 17 00:00:00 2001
From: alexpaniman <alexpaniman at gmail.com>
Date: Thu, 23 Oct 2025 23:25:45 +0300
Subject: [PATCH] [clang] Make -dump-tokens option align tokens

---
 clang/lib/Lex/Preprocessor.cpp          | 19 +++++++++++--------
 clang/test/Preprocessor/dump-tokens.cpp | 16 ++++++++++++++++
 2 files changed, 27 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/Preprocessor/dump-tokens.cpp

diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index e003ad3a95570..fcf2369453d47 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -59,6 +59,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -234,14 +235,20 @@ void Preprocessor::FinalizeForModelFile() {
 }
 
 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
-  llvm::errs() << tok::getTokenName(Tok.getKind());
+  llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
 
-  if (!Tok.isAnnotation())
-    llvm::errs() << " '" << getSpelling(Tok) << "'";
+  std::string Spelling;
+  if (!Tok.isAnnotation()) {
+    Spelling = llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'");
+  }
+  llvm::errs() << Spelling;
 
   if (!DumpFlags) return;
 
-  llvm::errs() << "\t";
+  llvm::errs() << "Loc=<";
+  DumpLocation(Tok.getLocation());
+  llvm::errs() << ">";
+
   if (Tok.isAtStartOfLine())
     llvm::errs() << " [StartOfLine]";
   if (Tok.hasLeadingSpace())
@@ -253,10 +260,6 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
                  << "']";
   }
-
-  llvm::errs() << "\tLoc=<";
-  DumpLocation(Tok.getLocation());
-  llvm::errs() << ">";
 }
 
 void Preprocessor::DumpLocation(SourceLocation Loc) const {
diff --git a/clang/test/Preprocessor/dump-tokens.cpp b/clang/test/Preprocessor/dump-tokens.cpp
new file mode 100644
index 0000000000000..3774894943b87
--- /dev/null
+++ b/clang/test/Preprocessor/dump-tokens.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -dump-tokens %s 2>&1 | FileCheck %s
+
+->                           // CHECK: arrow            '->'
+5                            // CHECK: numeric_constant '5'
+id                           // CHECK: identifier       'id'
+&                            // CHECK: amp              '&'
+)                            // CHECK: r_paren          ')'
+unsigned                     // CHECK: unsigned         'unsigned'
+~                            // CHECK: tilde            '~'
+long_variable_name_very_long // CHECK: identifier       'long_variable_name_very_long'
+union                        // CHECK: union            'union'
+42                           // CHECK: numeric_constant '42'
+j                            // CHECK: identifier       'j'
+&=                           // CHECK: ampequal         '&='
+15                           // CHECK: numeric_constant '15'
+