[clang] [clang] Make -dump-tokens option align tokens (PR #164894)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Mar 15 10:47:58 PDT 2026
https://github.com/alexpaniman updated https://github.com/llvm/llvm-project/pull/164894
>From 805d4a513d3927620058d9248d451bd4948e709d Mon Sep 17 00:00:00 2001
From: alexpaniman <alexpaniman at gmail.com>
Date: Thu, 23 Oct 2025 23:25:45 +0300
Subject: [PATCH 1/4] [clang] Make -dump-tokens option align tokens
---
clang/lib/Lex/Preprocessor.cpp | 19 +++++++++++--------
clang/test/Preprocessor/dump-tokens.cpp | 16 ++++++++++++++++
2 files changed, 27 insertions(+), 8 deletions(-)
create mode 100644 clang/test/Preprocessor/dump-tokens.cpp
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index e003ad3a95570..fcf2369453d47 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -59,6 +59,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Capacity.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -234,14 +235,20 @@ void Preprocessor::FinalizeForModelFile() {
}
void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
- llvm::errs() << tok::getTokenName(Tok.getKind());
+ llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
- if (!Tok.isAnnotation())
- llvm::errs() << " '" << getSpelling(Tok) << "'";
+ std::string Spelling;
+ if (!Tok.isAnnotation()) {
+ Spelling = llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'");
+ }
+ llvm::errs() << Spelling;
if (!DumpFlags) return;
- llvm::errs() << "\t";
+ llvm::errs() << "Loc=<";
+ DumpLocation(Tok.getLocation());
+ llvm::errs() << ">";
+
if (Tok.isAtStartOfLine())
llvm::errs() << " [StartOfLine]";
if (Tok.hasLeadingSpace())
@@ -253,10 +260,6 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
<< "']";
}
-
- llvm::errs() << "\tLoc=<";
- DumpLocation(Tok.getLocation());
- llvm::errs() << ">";
}
void Preprocessor::DumpLocation(SourceLocation Loc) const {
diff --git a/clang/test/Preprocessor/dump-tokens.cpp b/clang/test/Preprocessor/dump-tokens.cpp
new file mode 100644
index 0000000000000..3774894943b87
--- /dev/null
+++ b/clang/test/Preprocessor/dump-tokens.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -dump-tokens %s 2>&1 | FileCheck %s
+
+-> // CHECK: arrow '->'
+5 // CHECK: numeric_constant '5'
+id // CHECK: identifier 'id'
+& // CHECK: amp '&'
+) // CHECK: r_paren ')'
+unsigned // CHECK: unsigned 'unsigned'
+~ // CHECK: tilde '~'
+long_variable_name_very_long // CHECK: identifier 'long_variable_name_very_long'
+union // CHECK: union 'union'
+42 // CHECK: numeric_constant '42'
+j // CHECK: identifier 'j'
+&= // CHECK: ampequal '&='
+15 // CHECK: numeric_constant '15'
+
>From 194f5ad66f6e30e2628d91f323480da5ce4c83a2 Mon Sep 17 00:00:00 2001
From: alexpaniman <alexpaniman at gmail.com>
Date: Fri, 24 Oct 2025 19:52:33 +0300
Subject: [PATCH 2/4] [clang] Remove unnecessary variable from
Preprocessor::DumpToken
---
clang/lib/Lex/Preprocessor.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index fcf2369453d47..76e735eec7f13 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -237,11 +237,9 @@ void Preprocessor::FinalizeForModelFile() {
void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
- std::string Spelling;
if (!Tok.isAnnotation()) {
- Spelling = llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'");
+ llvm::errs() << llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'");
}
- llvm::errs() << Spelling;
if (!DumpFlags) return;
>From ba0ba385ca90a4531f810db9feb85fc31b0d8ba1 Mon Sep 17 00:00:00 2001
From: alexpaniman <alexpaniman at gmail.com>
Date: Fri, 24 Oct 2025 20:10:44 +0300
Subject: [PATCH 3/4] [clang] Ensure consistent spacing for annotations too in
Preprocessor::DumpToken
---
clang/lib/Lex/Preprocessor.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 76e735eec7f13..1d1b7c2358a03 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -237,10 +237,13 @@ void Preprocessor::FinalizeForModelFile() {
void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
+ std::string Spelling;
if (!Tok.isAnnotation()) {
- llvm::errs() << llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'");
+ Spelling = "'" + getSpelling(Tok) + "'";
}
+ llvm::errs() << llvm::formatv("{0,-32} ", Spelling);
+
if (!DumpFlags) return;
llvm::errs() << "Loc=<";
>From d0ca1e6ee2667da24dfe216e601d1b63da6e54b1 Mon Sep 17 00:00:00 2001
From: alexpaniman <alexpaniman at gmail.com>
Date: Sun, 15 Mar 2026 20:47:10 +0300
Subject: [PATCH 4/4] [clang] Escape multiline tokens and align trailing
markers in DumpToken
---
clang/lib/Lex/Preprocessor.cpp | 51 +++++++++++++++++++++----
clang/test/Preprocessor/dump-tokens.cpp | 46 +++++++++++++++-------
2 files changed, 76 insertions(+), 21 deletions(-)
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 1d1b7c2358a03..a1101b5943544 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -235,21 +235,59 @@ void Preprocessor::FinalizeForModelFile() {
}
void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
- llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
+ std::string TokenStr;
+ llvm::raw_string_ostream OS(TokenStr);
- std::string Spelling;
+ // The alignment of 16 is chosen to comfortably fit most identifiers.
+ OS << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
+
+ // Annotation tokens are just markers that don't have a spelling -- they
+ // indicate where something expanded.
if (!Tok.isAnnotation()) {
- Spelling = "'" + getSpelling(Tok) + "'";
+ OS << "'";
+ // Escape string to prevent token spelling from spanning multiple lines.
+ OS.write_escaped(getSpelling(Tok));
+ OS << "'";
}
- llvm::errs() << llvm::formatv("{0,-32} ", Spelling);
+ // The alignment of 48 (32 characters for the spelling + the 16 for
+ // the identifier name) fits most variable names, keywords and annotations.
+ llvm::errs() << llvm::formatv("{0,-48} ", OS.str());
if (!DumpFlags) return;
+ auto Loc = Tok.getLocation();
llvm::errs() << "Loc=<";
- DumpLocation(Tok.getLocation());
+ DumpLocation(Loc);
llvm::errs() << ">";
+ // If the token points directly to a file location (i.e. not a macro
+ // expansion), then add additional padding so that trailing markers
+ // align, provided the line/column numbers are reasonably sized.
+ //
+ // Otherwise, if it's a macro expansion, don't bother with alignment,
+ // as the line will include multiple locations and be very long.
+ //
+ // NOTE: To keep this stateless, it doesn't account for filename
+ // length, so when a header starts markers will be temporarily misaligned.
+ if (Loc.isFileID()) {
+ PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
+
+ if (!PLoc.isInvalid()) {
+ int LineWidth = llvm::utostr(PLoc.getLine()).size();
+ int ColumnWidth = llvm::utostr(PLoc.getColumn()).size();
+
+ // Reserve space for lines up to 9999 and columns up to 99,
+ // which is 4 + 2 = 6 characters in total.
+ const int ReservedSpace = 6;
+
+ int LeftSpace = ReservedSpace - LineWidth - ColumnWidth;
+ int Padding = std::max<int>(0, LeftSpace);
+
+ llvm::errs().indent(Padding);
+ }
+ }
+
if (Tok.isAtStartOfLine())
llvm::errs() << " [StartOfLine]";
if (Tok.hasLeadingSpace())
@@ -258,8 +296,7 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
llvm::errs() << " [ExpandDisabled]";
if (Tok.needsCleaning()) {
const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
- llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
- << "']";
+ llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) << "']";
}
}
diff --git a/clang/test/Preprocessor/dump-tokens.cpp b/clang/test/Preprocessor/dump-tokens.cpp
index 3774894943b87..0a9d459688922 100644
--- a/clang/test/Preprocessor/dump-tokens.cpp
+++ b/clang/test/Preprocessor/dump-tokens.cpp
@@ -1,16 +1,34 @@
-// RUN: %clang_cc1 -dump-tokens %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -dump-tokens %s 2>&1 | FileCheck %s --strict-whitespace
--> // CHECK: arrow '->'
-5 // CHECK: numeric_constant '5'
-id // CHECK: identifier 'id'
-& // CHECK: amp '&'
-) // CHECK: r_paren ')'
-unsigned // CHECK: unsigned 'unsigned'
-~ // CHECK: tilde '~'
-long_variable_name_very_long // CHECK: identifier 'long_variable_name_very_long'
-union // CHECK: union 'union'
-42 // CHECK: numeric_constant '42'
-j // CHECK: identifier 'j'
-&= // CHECK: ampequal '&='
-15 // CHECK: numeric_constant '15'
+// Different kinds of identifiers with different spelling lengths
+-> // CHECK: arrow '->' Loc=<{{.*}}:4:1> [StartOfLine]
+5 // CHECK-NEXT: numeric_constant '5' Loc=<{{.*}}:5:1> [StartOfLine]
+id // CHECK-NEXT: identifier 'id' Loc=<{{.*}}:6:1> [StartOfLine]
+& // CHECK-NEXT: amp '&' Loc=<{{.*}}:7:1> [StartOfLine]
+) // CHECK-NEXT: r_paren ')' Loc=<{{.*}}:8:1> [StartOfLine]
+unsigned // CHECK-NEXT: unsigned 'unsigned' Loc=<{{.*}}:9:1> [StartOfLine]
+~ // CHECK-NEXT: tilde '~' Loc=<{{.*}}:10:1> [StartOfLine]
+long_variable_name_very_long // CHECK-NEXT: identifier 'long_variable_name_very_long' Loc=<{{.*}}:11:1> [StartOfLine]
+union // CHECK-NEXT: union 'union' Loc=<{{.*}}:12:1> [StartOfLine]
+42 // CHECK-NEXT: numeric_constant '42' Loc=<{{.*}}:13:1> [StartOfLine]
+j // CHECK-NEXT: identifier 'j' Loc=<{{.*}}:14:1> [StartOfLine]
+&= // CHECK-NEXT: ampequal '&=' Loc=<{{.*}}:15:1> [StartOfLine]
+15 // CHECK-NEXT: numeric_constant '15' Loc=<{{.*}}:16:1> [StartOfLine]
+
+// Different locations in line and trailing markers
+ at different locations= in line // CHECK-NEXT: identifier 'at' Loc=<{{.*}}:19:2> [StartOfLine] [LeadingSpace]
+ // CHECK-NEXT: identifier 'different' Loc=<{{.*}}:19:5> [LeadingSpace]
+ // CHECK-NEXT: identifier 'locations' Loc=<{{.*}}:19:15> [LeadingSpace]
+ // CHECK-NEXT: equal '=' Loc=<{{.*}}:19:24>
+ // CHECK-NEXT: identifier 'in' Loc=<{{.*}}:19:26> [LeadingSpace]
+ // CHECK-NEXT: identifier 'line' Loc=<{{.*}}:19:29> [LeadingSpace]
+
+// Tokens that require escaping & annotations
+#pragma clang __debug parser_crash // CHECK-NEXT: annot_pragma_parser_crash Loc=<{{.*}}:27:23>
+ // CHECK-NEXT: eod '\n' Loc=<{{.*}}:27:119> [LeadingSpace]
+#pragma clang __debug captured // CHECK-NEXT: annot_pragma_captured Loc=<{{.*}}:29:120>
+#pragma clang __debug dump X // CHECK-NEXT: annot_pragma_dump Loc=<{{.*}}:30:23>
+ // CHECK-NEXT: identifier 'X' Loc=<{{.*}}:30:28> [LeadingSpace]
+ // CHECK-NEXT: eod '\n' Loc=<{{.*}}:30:119> [LeadingSpace]
+ // CHECK-NEXT: eof '' Loc=<{{.*}}:34:1>
More information about the cfe-commits
mailing list