[clang] #160667 Preserve source location for macro arguments. (PR #162319)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 7 09:40:33 PDT 2025
https://github.com/SergejSalnikov updated https://github.com/llvm/llvm-project/pull/162319
>From b475c8612e3380983c6ced33887ca4efe0211f90 Mon Sep 17 00:00:00 2001
From: skill <skill at google.com>
Date: Tue, 7 Oct 2025 18:26:56 +0200
Subject: [PATCH] #160667 Preserve source location for macro arguments.
---
clang/include/clang/Lex/TokenLexer.h | 8 -
.../lib/Frontend/PrintPreprocessedOutput.cpp | 2 +-
clang/lib/Lex/MacroArgs.cpp | 12 +-
clang/lib/Lex/TokenLexer.cpp | 198 ++++++------------
4 files changed, 79 insertions(+), 141 deletions(-)
diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h
index 0456dd961fc30..29eee0f671791 100644
--- a/clang/include/clang/Lex/TokenLexer.h
+++ b/clang/include/clang/Lex/TokenLexer.h
@@ -222,14 +222,6 @@ class TokenLexer {
/// macro expansion source location entry.
SourceLocation getExpansionLocForMacroDefLoc(SourceLocation loc) const;
- /// Creates SLocEntries and updates the locations of macro argument
- /// tokens to their new expanded locations.
- ///
- /// \param ArgIdSpellLoc the location of the macro argument id inside the
- /// macro definition.
- void updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
- Token *begin_tokens, Token *end_tokens);
-
/// Remove comma ahead of __VA_ARGS__, if present, according to compiler
/// dialect settings. Returns true if the comma is removed.
bool MaybeRemoveCommaBeforeVaArgs(SmallVectorImpl<Token> &ResultToks,
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 9e046633328d7..32e2b8cdcf4c6 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -306,7 +306,7 @@ bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
*OS << '\n';
StartedNewLine = true;
} else if (!DisableLineMarkers) {
- if (LineNo - CurLine <= 8) {
+ if (LineNo >= CurLine && LineNo - CurLine <= 8) {
const char *NewLines = "\n\n\n\n\n\n\n\n";
OS->write(NewLines, LineNo - CurLine);
} else {
diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp
index 548df16c59f6b..43e1ba9146c24 100644
--- a/clang/lib/Lex/MacroArgs.cpp
+++ b/clang/lib/Lex/MacroArgs.cpp
@@ -65,8 +65,16 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI,
"assume trivial copyability if copying into the "
"uninitialized array (as opposed to reusing a cached "
"MacroArgs)");
- std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(),
- Result->getTrailingObjects());
+ Token *Tokens = Result->getTrailingObjects();
+ std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(), Tokens);
+
+ // Clear StartOfLine flag on all argument tokens. These tokens are being
+ // used in a macro expansion context where their original line position
+ // is not relevant. Keeping this flag causes incorrect spacing during
+ // stringification (e.g., "a/ b" instead of "a/b").
+ for (unsigned i = 0; i < UnexpArgTokens.size(); ++i) {
+ Tokens[i].clearFlag(Token::StartOfLine);
+ }
}
return Result;
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index 47f4134fb1465..61fb1406a7185 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -477,12 +477,6 @@ void TokenLexer::ExpandFunctionArguments() {
if (Tok.is(tok::hashhash))
Tok.setKind(tok::unknown);
- if(ExpandLocStart.isValid()) {
- updateLocForMacroArgTokens(CurTok.getLocation(),
- ResultToks.begin()+FirstResult,
- ResultToks.end());
- }
-
// If any tokens were substituted from the argument, the whitespace
// before the first token should match the whitespace of the arg
// identifier.
@@ -534,11 +528,6 @@ void TokenLexer::ExpandFunctionArguments() {
Tok.setKind(tok::unknown);
}
- if (ExpandLocStart.isValid()) {
- updateLocForMacroArgTokens(CurTok.getLocation(),
- ResultToks.end()-NumToks, ResultToks.end());
- }
-
// Transfer the leading whitespace information from the token
// (the macro argument) onto the first token of the
// expansion. Note that we don't do this for the GNU
@@ -600,6 +589,23 @@ void TokenLexer::ExpandFunctionArguments() {
assert(!OwnsTokens && "This would leak if we already own the token list");
// This is deleted in the dtor.
NumTokens = ResultToks.size();
+
+ // Set StartOfLine flags for all tokens based on their line numbers. This
+ // ensures HandleWhitespaceBeforeTok calls MoveToLine for tokens on
+ // different lines.
+ SourceManager &SM = PP.getSourceManager();
+ unsigned PrevLine = 0;
+ for (size_t i = 0; i < ResultToks.size(); ++i) {
+ Token &Tok = ResultToks[i];
+ SourceLocation Loc = Tok.getLocation();
+ unsigned CurLine = SM.getPresumedLoc(Loc).getLine();
+ if (i == 0 || (CurLine != PrevLine && CurLine > 0)) {
+ // First token or token on different line - mark as start of line
+ Tok.setFlag(Token::StartOfLine);
+ }
+ PrevLine = CurLine;
+ }
+
// The tokens will be added to Preprocessor's cache and will be removed
// when this TokenLexer finishes lexing them.
Tokens = PP.cacheMacroExpandedTokens(this, ResultToks);
@@ -678,7 +684,31 @@ bool TokenLexer::Lex(Token &Tok) {
ExpandLocEnd,
Tok.getLength());
} else {
- instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
+ // Check if this token is from a macro argument.
+ // Macro argument tokens have FileID locations (not MacroID) because we
+ // skipped remapping in updateLocForMacroArgTokens. We need to check if
+ // the token is from the macro definition or from an argument.
+ if (SM.isInSLocAddrSpace(Tok.getLocation(), MacroDefStart,
+ MacroDefLength)) {
+ // Token is from macro definition - remap to expansion location
+ instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
+ } else {
+ // Token is from macro argument - wrap FileID in MacroID for diagnostic
+ // suppression while preserving line numbers. Use self-referencing
+ // createMacroArgExpansionLoc where both spelling and expansion point
+ // to the same FileID. This ensures isMacroID() returns true (for
+ // diagnostic suppression) while getDecomposedExpansionLoc() resolves
+ // to the original FileID (for line number preservation).
+ instLoc = SM.createMacroArgExpansionLoc(
+ Tok.getLocation(), Tok.getLocation(), Tok.getLength());
+
+ // Clear the StartOfLine flag for macro argument tokens. These tokens
+ // are being inserted into a macro expansion context, and their original
+ // line position from the source is not relevant. Keeping this flag
+ // would cause incorrect spacing during stringification (e.g., "a/ b"
+ // instead of "a/b").
+ Tok.clearFlag(Token::StartOfLine);
+ }
}
Tok.setLocation(instLoc);
@@ -687,12 +717,15 @@ bool TokenLexer::Lex(Token &Tok) {
// If this is the first token, set the lexical properties of the token to
// match the lexical properties of the macro identifier.
if (isFirstToken) {
- Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
+ // Preserve StartOfLine flag if already set (e.g., for macro arguments)
+ if (!Tok.isAtStartOfLine()) {
+ Tok.setFlagValue(Token::StartOfLine, AtStartOfLine);
+ }
Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
} else {
// If this is not the first token, we may still need to pass through
// leading whitespace if we've expanded a macro.
- if (AtStartOfLine) Tok.setFlag(Token::StartOfLine);
+ if (AtStartOfLine || Tok.isAtStartOfLine()) Tok.setFlag(Token::StartOfLine);
if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace);
}
AtStartOfLine = false;
@@ -900,16 +933,33 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
// expanded from the full ## expression. Pull this information together into
// a new SourceLocation that captures all of this.
SourceManager &SM = PP.getSourceManager();
- if (StartLoc.isFileID())
+ // Only convert FileID to expansion loc if it's from the macro definition.
+ // Macro argument tokens have FileID locations that are NOT from
+ // MacroDefStart.
+ if (StartLoc.isFileID() &&
+ SM.isInSLocAddrSpace(StartLoc, MacroDefStart, MacroDefLength))
StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
- if (EndLoc.isFileID())
+ if (EndLoc.isFileID() &&
+ SM.isInSLocAddrSpace(EndLoc, MacroDefStart, MacroDefLength))
EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
FileID MacroFID = SM.getFileID(MacroExpansionStart);
- while (SM.getFileID(StartLoc) != MacroFID)
+ // Only walk expansion ranges for MacroID locations.
+ // FileID locations (from macro arguments) don't have expansion ranges.
+ // Must check isMacroID() in the loop condition because
+ // getImmediateExpansionRange might return a FileID location, which would fail
+ // on the next iteration.
+ while (StartLoc.isMacroID() && SM.getFileID(StartLoc) != MacroFID)
StartLoc = SM.getImmediateExpansionRange(StartLoc).getBegin();
- while (SM.getFileID(EndLoc) != MacroFID)
+ while (EndLoc.isMacroID() && SM.getFileID(EndLoc) != MacroFID)
EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd();
+ // If we exited the loop with FileID locations (from macro arguments),
+ // fall back to using MacroExpansionStart.
+ if (StartLoc.isFileID() && SM.getFileID(StartLoc) != MacroFID)
+ StartLoc = MacroExpansionStart;
+ if (EndLoc.isFileID() && SM.getFileID(EndLoc) != MacroFID)
+ EndLoc = MacroExpansionStart;
+
LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc,
LHSTok.getLength()));
@@ -985,118 +1035,6 @@ TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
return MacroExpansionStart.getLocWithOffset(relativeOffset);
}
-/// Finds the tokens that are consecutive (from the same FileID)
-/// creates a single SLocEntry, and assigns SourceLocations to each token that
-/// point to that SLocEntry. e.g for
-/// assert(foo == bar);
-/// There will be a single SLocEntry for the "foo == bar" chunk and locations
-/// for the 'foo', '==', 'bar' tokens will point inside that chunk.
-///
-/// \arg begin_tokens will be updated to a position past all the found
-/// consecutive tokens.
-static void updateConsecutiveMacroArgTokens(SourceManager &SM,
- SourceLocation ExpandLoc,
- Token *&begin_tokens,
- Token * end_tokens) {
- assert(begin_tokens + 1 < end_tokens);
- SourceLocation BeginLoc = begin_tokens->getLocation();
- llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens);
- llvm::MutableArrayRef<Token> Partition;
-
- auto NearLast = [&, Last = BeginLoc](SourceLocation Loc) mutable {
- // The maximum distance between two consecutive tokens in a partition.
- // This is an important trick to avoid using too much SourceLocation address
- // space!
- static constexpr SourceLocation::IntTy MaxDistance = 50;
- auto Distance = Loc.getRawEncoding() - Last.getRawEncoding();
- Last = Loc;
- return Distance <= MaxDistance;
- };
-
- // Partition the tokens by their FileID.
- // This is a hot function, and calling getFileID can be expensive, the
- // implementation is optimized by reducing the number of getFileID.
- if (BeginLoc.isFileID()) {
- // Consecutive tokens not written in macros must be from the same file.
- // (Neither #include nor eof can occur inside a macro argument.)
- Partition = All.take_while([&](const Token &T) {
- return T.getLocation().isFileID() && NearLast(T.getLocation());
- });
- } else {
- // Call getFileID once to calculate the bounds, and use the cheaper
- // sourcelocation-against-bounds comparison.
- FileID BeginFID = SM.getFileID(BeginLoc);
- SourceLocation Limit =
- SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID));
- Partition = All.take_while([&](const Token &T) {
- // NOTE: the Limit is included! The lexer recovery only ever inserts a
- // single token past the end of the FileID, specifically the ) when a
- // macro-arg containing a comma should be guarded by parentheses.
- //
- // It is safe to include the Limit here because SourceManager allocates
- // FileSize + 1 for each SLocEntry.
- //
- // See https://github.com/llvm/llvm-project/issues/60722.
- return T.getLocation() >= BeginLoc && T.getLocation() <= Limit
- && NearLast(T.getLocation());
- });
- }
- assert(!Partition.empty());
-
- // For the consecutive tokens, find the length of the SLocEntry to contain
- // all of them.
- SourceLocation::UIntTy FullLength =
- Partition.back().getEndLoc().getRawEncoding() -
- Partition.front().getLocation().getRawEncoding();
- // Create a macro expansion SLocEntry that will "contain" all of the tokens.
- SourceLocation Expansion =
- SM.createMacroArgExpansionLoc(BeginLoc, ExpandLoc, FullLength);
-
-#ifdef EXPENSIVE_CHECKS
- assert(llvm::all_of(Partition.drop_front(),
- [&SM, ID = SM.getFileID(Partition.front().getLocation())](
- const Token &T) {
- return ID == SM.getFileID(T.getLocation());
- }) &&
- "Must have the same FIleID!");
-#endif
- // Change the location of the tokens from the spelling location to the new
- // expanded location.
- for (Token& T : Partition) {
- SourceLocation::IntTy RelativeOffset =
- T.getLocation().getRawEncoding() - BeginLoc.getRawEncoding();
- T.setLocation(Expansion.getLocWithOffset(RelativeOffset));
- }
- begin_tokens = &Partition.back() + 1;
-}
-
-/// Creates SLocEntries and updates the locations of macro argument
-/// tokens to their new expanded locations.
-///
-/// \param ArgIdSpellLoc the location of the macro argument id inside the macro
-/// definition.
-void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
- Token *begin_tokens,
- Token *end_tokens) {
- SourceManager &SM = PP.getSourceManager();
-
- SourceLocation ExpandLoc =
- getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
-
- while (begin_tokens < end_tokens) {
- // If there's only one token just create a SLocEntry for it.
- if (end_tokens - begin_tokens == 1) {
- Token &Tok = *begin_tokens;
- Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),
- ExpandLoc,
- Tok.getLength()));
- return;
- }
-
- updateConsecutiveMacroArgTokens(SM, ExpandLoc, begin_tokens, end_tokens);
- }
-}
-
void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
AtStartOfLine = Result.isAtStartOfLine();
HasLeadingSpace = Result.hasLeadingSpace();
More information about the cfe-commits
mailing list