[clang] 67268ee - [Syntax] Fix macro-arg handling in TokenBuffer::spelledForExpanded
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 5 09:04:50 PDT 2022
Author: Sam McCall
Date: 2022-10-05T18:04:39+02:00
New Revision: 67268ee11c220b1dfdf84afb10a12371c5ae6400
URL: https://github.com/llvm/llvm-project/commit/67268ee11c220b1dfdf84afb10a12371c5ae6400
DIFF: https://github.com/llvm/llvm-project/commit/67268ee11c220b1dfdf84afb10a12371c5ae6400.diff
LOG: [Syntax] Fix macro-arg handling in TokenBuffer::spelledForExpanded
A few cases were not handled correctly. Notably:
#define ID(X) X
#define HIDE a ID(b)
HIDE
spelledForExpanded() would claim HIDE is an equivalent range of the 'b' it
contains, despite the fact that HIDE also covers 'a'.
While trying to fix this bug, I found findCommonRangeForMacroArgs hard
to understand (both the implementation and how it's used in spelledForExpanded).
It relies on details of the SourceLocation graph that are IMO fairly obscure.
So I've added/revised quite a lot of comments and made some naming tweaks.
Fixes https://github.com/clangd/clangd/issues/1289
Differential Revision: https://reviews.llvm.org/D134618
Added:
Modified:
clang/lib/Tooling/Syntax/Tokens.cpp
clang/unittests/Tooling/Syntax/TokensTest.cpp
Removed:
################################################################################
diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp
index 61e02b19fa83f..fe979afe6f951 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -55,45 +55,140 @@ getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
return {Begin, End};
}
-// Finds the smallest expansion range that contains expanded tokens First and
-// Last, e.g.:
+// Finds the range within FID corresponding to expanded tokens [First, Last].
+// Prev precedes First and Next follows Last, these must *not* be included.
+// If no range satisfies the criteria, returns an invalid range.
+//
// #define ID(x) x
// ID(ID(ID(a1) a2))
// ~~ -> a1
// ~~ -> a2
// ~~~~~~~~~ -> a1 a2
-SourceRange findCommonRangeForMacroArgs(const syntax::Token &First,
- const syntax::Token &Last,
- const SourceManager &SM) {
- SourceRange Res;
- auto FirstLoc = First.location(), LastLoc = Last.location();
- // Keep traversing up the spelling chain as longs as tokens are part of the
- // same expansion.
- while (!FirstLoc.isFileID() && !LastLoc.isFileID()) {
- auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion();
- auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion();
- // Stop if expansions have diverged.
- if (ExpInfoFirst.getExpansionLocStart() !=
- ExpInfoLast.getExpansionLocStart())
+SourceRange spelledForExpandedSlow(SourceLocation First, SourceLocation Last,
+ SourceLocation Prev, SourceLocation Next,
+ FileID TargetFile,
+ const SourceManager &SM) {
+ // There are two main parts to this algorithm:
+ // - identifying which spelled range covers the expanded tokens
+ // - validating that this range doesn't cover any extra tokens (First/Last)
+ //
+ // We do these in order. However as we transform the expanded range into the
+ // spelled one, we adjust First/Last so the validation remains simple.
+
+ assert(SM.getSLocEntry(TargetFile).isFile());
+ // In most cases, to select First and Last we must return their expansion
+ // range, i.e. the whole of any macros they are included in.
+ //
+ // When First and Last are part of the *same macro arg* of a macro written
+ // in TargetFile, we that slice of the arg, i.e. their spelling range.
+ //
+ // Unwrap such macro calls. If the target file has A(B(C)), the
+ // SourceLocation stack of a token inside C shows us the expansion of A first,
+ // then B, then any macros inside C's body, then C itself.
+ // (This is the reverse of the order the PP applies the expansions in).
+ while (First.isMacroID() && Last.isMacroID()) {
+ auto DecFirst = SM.getDecomposedLoc(First);
+ auto DecLast = SM.getDecomposedLoc(Last);
+ auto &ExpFirst = SM.getSLocEntry(DecFirst.first).getExpansion();
+ auto &ExpLast = SM.getSLocEntry(DecLast.first).getExpansion();
+
+ if (!ExpFirst.isMacroArgExpansion() || !ExpLast.isMacroArgExpansion())
+ break;
+ // Locations are in the same macro arg if they expand to the same place.
+ // (They may still have
diff erent FileIDs - an arg can have >1 chunks!)
+ if (ExpFirst.getExpansionLocStart() != ExpLast.getExpansionLocStart())
break;
- // Do not continue into macro bodies.
- if (!ExpInfoFirst.isMacroArgExpansion() ||
- !ExpInfoLast.isMacroArgExpansion())
+ // Careful, given:
+ // #define HIDE ID(ID(a))
+ // ID(ID(HIDE))
+ // The token `a` is wrapped in 4 arg-expansions, we only want to unwrap 2.
+ // We distinguish them by whether the macro expands into the target file.
+ // Fortunately, the target file ones will always appear first.
+ auto &ExpMacro =
+ SM.getSLocEntry(SM.getFileID(ExpFirst.getExpansionLocStart()))
+ .getExpansion();
+ if (ExpMacro.getExpansionLocStart().isMacroID())
break;
- FirstLoc = SM.getImmediateSpellingLoc(FirstLoc);
- LastLoc = SM.getImmediateSpellingLoc(LastLoc);
- // Update the result afterwards, as we want the tokens that triggered the
- // expansion.
- Res = {FirstLoc, LastLoc};
+ // Replace each endpoint with its spelling inside the macro arg.
+ // (This is getImmediateSpellingLoc without repeating lookups).
+ First = ExpFirst.getSpellingLoc().getLocWithOffset(DecFirst.second);
+ Last = ExpLast.getSpellingLoc().getLocWithOffset(DecLast.second);
+
+ // Now: how do we adjust the previous/next bounds? Three cases:
+ // A) If they are also part of the same macro arg, we translate them too.
+ // This will ensure that we don't select any macros nested within the
+ // macro arg that cover extra tokens. Critical case:
+ // #define ID(X) X
+ // ID(prev target) // selecting 'target' succeeds
+ // #define LARGE ID(prev target)
+ // LARGE // selecting 'target' fails.
+ // B) They are not in the macro at all, then their expansion range is a
+ // sibling to it, and we can safely substitute that.
+ // #define PREV prev
+ // #define ID(X) X
+ // PREV ID(target) // selecting 'target' succeeds.
+ // #define LARGE PREV ID(target)
+ // LARGE // selecting 'target' fails.
+ // C) They are in a
diff erent arg of this macro, or the macro body.
+ // Now selecting the whole macro arg is fine, but the whole macro is not.
+ // Model this by setting using the edge of the macro call as the bound.
+ // #define ID2(X, Y) X Y
+ // ID2(prev, target) // selecting 'target' succeeds
+ // #define LARGE ID2(prev, target)
+ // LARGE // selecting 'target' fails
+ auto AdjustBound = [&](SourceLocation &Bound) {
+ if (Bound.isInvalid() || !Bound.isMacroID()) // Non-macro must be case B.
+ return;
+ auto DecBound = SM.getDecomposedLoc(Bound);
+ auto &ExpBound = SM.getSLocEntry(DecBound.first).getExpansion();
+ if (ExpBound.isMacroArgExpansion() &&
+ ExpBound.getExpansionLocStart() == ExpFirst.getExpansionLocStart()) {
+ // Case A: translate to (spelling) loc within the macro arg.
+ Bound = ExpBound.getSpellingLoc().getLocWithOffset(DecBound.second);
+ return;
+ }
+ while (Bound.isMacroID()) {
+ SourceRange Exp = SM.getImmediateExpansionRange(Bound).getAsRange();
+ if (Exp.getBegin() == ExpMacro.getExpansionLocStart()) {
+ // Case B: bounds become the macro call itself.
+ Bound = (&Bound == &Prev) ? Exp.getBegin() : Exp.getEnd();
+ return;
+ }
+ // Either case C, or expansion location will later find case B.
+ // We choose the upper bound for Prev and the lower one for Next:
+ // ID(prev) target ID(next)
+ // ^ ^
+ // new-prev new-next
+ Bound = (&Bound == &Prev) ? Exp.getEnd() : Exp.getBegin();
+ }
+ };
+ AdjustBound(Prev);
+ AdjustBound(Next);
}
- // Normally mapping back to expansion location here only changes FileID, as
- // we've already found some tokens expanded from the same macro argument, and
- // they should map to a consecutive subset of spelled tokens. Unfortunately
- // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations
- // based on their FileID in addition to offsets. So even though we are
- // referring to same tokens, SourceManager might tell us that one is before
- // the other if they've got
diff erent FileIDs.
- return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange();
+
+ // In all remaining cases we need the full containing macros.
+ // If this overlaps Prev or Next, then no range is possible.
+ SourceRange Candidate =
+ SM.getExpansionRange(SourceRange(First, Last)).getAsRange();
+ auto DecFirst = SM.getDecomposedExpansionLoc(Candidate.getBegin());
+ auto DecLast = SM.getDecomposedLoc(Candidate.getEnd());
+ // Can end up in the wrong file due to bad input or token-pasting shenanigans.
+ if (Candidate.isInvalid() || DecFirst.first != TargetFile || DecLast.first != TargetFile)
+ return SourceRange();
+ // Check bounds, which may still be inside macros.
+ if (Prev.isValid()) {
+ auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Prev).getBegin());
+ if (Dec.first != DecFirst.first || Dec.second >= DecFirst.second)
+ return SourceRange();
+ }
+ if (Next.isValid()) {
+ auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Next).getEnd());
+ if (Dec.first != DecLast.first || Dec.second <= DecLast.second)
+ return SourceRange();
+ }
+ // Now we know that Candidate is a file range that covers [First, Last]
+ // without encroaching on {Prev, Next}. Ship it!
+ return Candidate;
}
} // namespace
@@ -363,51 +458,48 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
// of the range, bail out in that case.
if (Expanded.empty())
return llvm::None;
+ const syntax::Token *First = &Expanded.front();
+ const syntax::Token *Last = &Expanded.back();
+ auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First);
+ auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last);
- const syntax::Token *BeginSpelled;
- const Mapping *BeginMapping;
- std::tie(BeginSpelled, BeginMapping) =
- spelledForExpandedToken(&Expanded.front());
-
- const syntax::Token *LastSpelled;
- const Mapping *LastMapping;
- std::tie(LastSpelled, LastMapping) =
- spelledForExpandedToken(&Expanded.back());
-
- FileID FID = SourceMgr->getFileID(BeginSpelled->location());
+ FileID FID = SourceMgr->getFileID(FirstSpelled->location());
// FIXME: Handle multi-file changes by trying to map onto a common root.
if (FID != SourceMgr->getFileID(LastSpelled->location()))
return llvm::None;
const MarkedFile &File = Files.find(FID)->second;
- // If both tokens are coming from a macro argument expansion, try and map to
- // smallest part of the macro argument. BeginMapping && LastMapping check is
- // only for performance, they are a prerequisite for Expanded.front() and
- // Expanded.back() being part of a macro arg expansion.
- if (BeginMapping && LastMapping &&
- SourceMgr->isMacroArgExpansion(Expanded.front().location()) &&
- SourceMgr->isMacroArgExpansion(Expanded.back().location())) {
- auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(),
- Expanded.back(), *SourceMgr);
- // It might be the case that tokens are arguments of
diff erent macro calls,
- // in that case we should continue with the logic below instead of returning
- // an empty range.
- if (CommonRange.isValid())
- return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr);
+ // If the range is within one macro argument, the result may be only part of a
+ // Mapping. We must use the general (SourceManager-based) algorithm.
+ if (FirstMapping && FirstMapping == LastMapping &&
+ SourceMgr->isMacroArgExpansion(First->location()) &&
+ SourceMgr->isMacroArgExpansion(Last->location())) {
+ // We use excluded Prev/Next token for bounds checking.
+ SourceLocation Prev = (First == &ExpandedTokens.front())
+ ? SourceLocation()
+ : (First - 1)->location();
+ SourceLocation Next = (Last == &ExpandedTokens.back())
+ ? SourceLocation()
+ : (Last + 1)->location();
+ SourceRange Range = spelledForExpandedSlow(
+ First->location(), Last->location(), Prev, Next, FID, *SourceMgr);
+ if (Range.isInvalid())
+ return llvm::None;
+ return getTokensCovering(File.SpelledTokens, Range, *SourceMgr);
}
+ // Otherwise, use the fast version based on Mappings.
// Do not allow changes that doesn't cover full expansion.
- unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
- unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
- if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded)
+ unsigned FirstExpanded = Expanded.begin() - ExpandedTokens.data();
+ unsigned LastExpanded = Expanded.end() - ExpandedTokens.data();
+ if (FirstMapping && FirstExpanded != FirstMapping->BeginExpanded)
return llvm::None;
- if (LastMapping && LastMapping->EndExpanded != EndExpanded)
+ if (LastMapping && LastMapping->EndExpanded != LastExpanded)
return llvm::None;
- // All is good, return the result.
return llvm::makeArrayRef(
- BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled
- : BeginSpelled,
+ FirstMapping ? File.SpelledTokens.data() + FirstMapping->BeginSpelled
+ : FirstSpelled,
LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
: LastSpelled + 1);
}
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index 77f719ce28cd0..85fc837fbe2be 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -743,6 +743,62 @@ TEST_F(TokenBufferTest, SpelledByExpanded) {
ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )"))));
// Should fail, spans multiple invocations.
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")), llvm::None);
+
+ // https://github.com/clangd/clangd/issues/1289
+ recordTokens(R"cpp(
+ #define FOO(X) foo(X)
+ #define INDIRECT FOO(y)
+ INDIRECT // expands to foo(y)
+ )cpp");
+ EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("y")), llvm::None);
+
+ recordTokens(R"cpp(
+ #define FOO(X) a X b
+ FOO(y)
+ )cpp");
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("y")),
+ ValueIs(SameRange(findSpelled("y"))));
+
+ recordTokens(R"cpp(
+ #define ID(X) X
+ #define BAR ID(1)
+ BAR
+ )cpp");
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1")),
+ ValueIs(SameRange(findSpelled(") BAR").drop_front())));
+
+ // Critical cases for mapping of Prev/Next in spelledForExpandedSlow.
+ recordTokens(R"cpp(
+ #define ID(X) X
+ ID(prev ID(good))
+ #define LARGE ID(prev ID(bad))
+ LARGE
+ )cpp");
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
+ ValueIs(SameRange(findSpelled("good"))));
+ EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), llvm::None);
+
+ recordTokens(R"cpp(
+ #define PREV prev
+ #define ID(X) X
+ PREV ID(good)
+ #define LARGE PREV ID(bad)
+ LARGE
+ )cpp");
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
+ ValueIs(SameRange(findSpelled("good"))));
+ EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), llvm::None);
+
+ recordTokens(R"cpp(
+ #define ID(X) X
+ #define ID2(X, Y) X Y
+ ID2(prev, ID(good))
+ #define LARGE ID2(prev, bad)
+ LARGE
+ )cpp");
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
+ ValueIs(SameRange(findSpelled("good"))));
+ EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), llvm::None);
}
TEST_F(TokenBufferTest, ExpandedTokensForRange) {
More information about the cfe-commits
mailing list