[clang] 1bf055c - [Syntax] Add mapping from spelled to expanded tokens for TokenBuffer
Dmitri Gribenko via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 7 06:10:55 PDT 2020
Author: Marcel Hlopko
Date: 2020-04-07T15:07:16+02:00
New Revision: 1bf055c9891f1a5ab2ff6a04348bd83fcc0a9cde
URL: https://github.com/llvm/llvm-project/commit/1bf055c9891f1a5ab2ff6a04348bd83fcc0a9cde
DIFF: https://github.com/llvm/llvm-project/commit/1bf055c9891f1a5ab2ff6a04348bd83fcc0a9cde.diff
LOG: [Syntax] Add mapping from spelled to expanded tokens for TokenBuffer
Summary:
Same restrictions apply as in the other direction: macro arguments are
not supported yet, only full macro expansions can be mapped.
Taking over from https://reviews.llvm.org/D72581.
Reviewers: gribozavr2, sammccall
Reviewed By: gribozavr2
Subscribers: cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D77209
Added:
Modified:
clang/include/clang/Tooling/Syntax/Tokens.h
clang/lib/Tooling/Syntax/Tokens.cpp
clang/unittests/Tooling/Syntax/TokensTest.cpp
clang/unittests/Tooling/Syntax/TreeTest.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h
index 7e50892284f4..a7f9369ddfff 100644
--- a/clang/include/clang/Tooling/Syntax/Tokens.h
+++ b/clang/include/clang/Tooling/Syntax/Tokens.h
@@ -171,7 +171,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T);
/// To build a token buffer use the TokenCollector class. You can also compute
/// the spelled tokens of a file using the tokenize() helper.
///
-/// FIXME: allow to map from spelled to expanded tokens when use-case shows up.
/// FIXME: allow mappings into macro arguments.
class TokenBuffer {
public:
@@ -228,6 +227,36 @@ class TokenBuffer {
llvm::Optional<llvm::ArrayRef<syntax::Token>>
spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const;
+ /// Find the subranges of expanded tokens, corresponding to \p Spelled.
+ ///
+ /// Some spelled tokens may not be present in the expanded token stream, so
+ /// this function can return an empty vector, e.g. for tokens of macro
+ /// directives or disabled preprocessor branches.
+ ///
+ /// Some spelled tokens can be duplicated in the expanded token stream
+ /// multiple times and this function will return multiple results in those
+ /// cases. This happens when \p Spelled is inside a macro argument.
+ ///
+ /// FIXME: return correct results on macro arguments. For now, we return an
+ /// empty list.
+ ///
+ /// (!) will return empty vector on tokens from #define body:
+ /// E.g. for the following example:
+ ///
+ /// #define FIRST(A) f1 A = A f2
+ /// #define SECOND s
+ ///
+ /// a FIRST(arg) b SECOND c // expanded tokens are: a f1 arg = arg f2 b s
+ /// The results would be
+ /// spelled => expanded
+ /// ------------------------
+ /// #define FIRST => {}
+ /// a FIRST(arg) => {a f1 arg = arg f2}
+ /// arg => {arg, arg} // arg #1 is before `=` and arg #2 is
+ /// // after `=` in the expanded tokens.
+ llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
+ expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const;
+
/// An expansion produced by the preprocessor, includes macro expansions and
/// preprocessor directives. Preprocessor always maps a non-empty range of
/// spelled tokens to a (possibly empty) range of expanded tokens. Here is a
@@ -317,6 +346,12 @@ class TokenBuffer {
std::pair<const syntax::Token *, const Mapping *>
spelledForExpandedToken(const syntax::Token *Expanded) const;
+ /// Returns a mapping starting before \p Spelled token, or nullptr if no
+ /// such mapping exists.
+ static const Mapping *
+ mappingStartingBeforeSpelled(const MarkedFile &F,
+ const syntax::Token *Spelled);
+
/// Token stream produced after preprocessing, conceputally this captures the
/// same stream as 'clang -E' (excluding the preprocessor directives like
/// #file, etc.).
diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp
index f214afb5c85e..1464d97d037d 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -213,19 +213,109 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
// Our token could only be produced by the previous mapping.
if (It == File.Mappings.begin()) {
// No previous mapping, no need to modify offsets.
- return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr};
+ return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
+ /*Mapping=*/nullptr};
}
--It; // 'It' now points to last mapping that started before our token.
// Check if the token is part of the mapping.
if (ExpandedIndex < It->EndExpanded)
- return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It};
+ return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
// Not part of the mapping, use the index from previous mapping to compute the
// corresponding spelled token.
return {
&File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
- /*Mapping*/ nullptr};
+ /*Mapping=*/nullptr};
+}
+
+const TokenBuffer::Mapping *
+TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
+ const syntax::Token *Spelled) {
+ assert(F.SpelledTokens.data() <= Spelled);
+ unsigned SpelledI = Spelled - F.SpelledTokens.data();
+ assert(SpelledI < F.SpelledTokens.size());
+
+ auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
+ return M.BeginSpelled <= SpelledI;
+ });
+ if (It == F.Mappings.begin())
+ return nullptr;
+ --It;
+ return &*It;
+}
+
+llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
+TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
+ if (Spelled.empty())
+ return {};
+ assert(Spelled.front().location().isFileID());
+
+ auto FID = sourceManager().getFileID(Spelled.front().location());
+ auto It = Files.find(FID);
+ assert(It != Files.end());
+
+ const MarkedFile &File = It->second;
+ // `Spelled` must be a subrange of `File.SpelledTokens`.
+ assert(File.SpelledTokens.data() <= Spelled.data());
+ assert(&Spelled.back() <=
+ File.SpelledTokens.data() + File.SpelledTokens.size());
+#ifndef NDEBUG
+ auto T1 = Spelled.back().location();
+ auto T2 = File.SpelledTokens.back().location();
+ assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
+#endif
+
+ auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
+ unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
+ assert(SpelledFrontI < File.SpelledTokens.size());
+ unsigned ExpandedBegin;
+ if (!FrontMapping) {
+ // No mapping that starts before the first token of Spelled, we don't have
+ // to modify offsets.
+ ExpandedBegin = File.BeginExpanded + SpelledFrontI;
+ } else if (SpelledFrontI < FrontMapping->EndSpelled) {
+ // This mapping applies to Spelled tokens.
+ if (SpelledFrontI != FrontMapping->BeginSpelled) {
+ // Spelled tokens don't cover the entire mapping, returning empty result.
+ return {}; // FIXME: support macro arguments.
+ }
+ // Spelled tokens start at the beginning of this mapping.
+ ExpandedBegin = FrontMapping->BeginExpanded;
+ } else {
+ // Spelled tokens start after the mapping ends (they start in the hole
+ // between 2 mappings, or between a mapping and end of the file).
+ ExpandedBegin =
+ FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
+ }
+
+ auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
+ unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
+ unsigned ExpandedEnd;
+ if (!BackMapping) {
+ // No mapping that starts before the last token of Spelled, we don't have to
+ // modify offsets.
+ ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
+ } else if (SpelledBackI < BackMapping->EndSpelled) {
+ // This mapping applies to Spelled tokens.
+ if (SpelledBackI + 1 != BackMapping->EndSpelled) {
+ // Spelled tokens don't cover the entire mapping, returning empty result.
+ return {}; // FIXME: support macro arguments.
+ }
+ ExpandedEnd = BackMapping->EndExpanded;
+ } else {
+ // Spelled tokens end after the mapping ends.
+ ExpandedEnd =
+ BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
+ }
+
+ assert(ExpandedBegin < ExpandedTokens.size());
+ assert(ExpandedEnd < ExpandedTokens.size());
+ // Avoid returning empty ranges.
+ if (ExpandedBegin == ExpandedEnd)
+ return {};
+ return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
+ ExpandedTokens.data() + ExpandedEnd)};
}
llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
@@ -550,6 +640,20 @@ class TokenCollector::Builder {
for (const auto &File : Result.Files)
discard(File.first);
+#ifndef NDEBUG
+ for (auto &pair : Result.Files) {
+ auto &mappings = pair.second.Mappings;
+ assert(std::is_sorted(
+ mappings.begin(), mappings.end(),
+ [](const TokenBuffer::Mapping &M1, const TokenBuffer::Mapping &M2) {
+ return M1.BeginSpelled < M2.BeginSpelled &&
+ M1.EndSpelled < M2.EndSpelled &&
+ M1.BeginExpanded < M2.BeginExpanded &&
+ M1.EndExpanded < M2.EndExpanded;
+ }));
+ }
+#endif
+
return std::move(Result);
}
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index a89d67fc6a76..453ab608e659 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -57,6 +57,7 @@ using ::testing::AllOf;
using ::testing::Contains;
using ::testing::ElementsAre;
using ::testing::Field;
+using ::testing::IsEmpty;
using ::testing::Matcher;
using ::testing::Not;
using ::testing::Pointee;
@@ -185,10 +186,14 @@ class TokenCollectorTest : public ::testing::Test {
template <class T, class U, class Eq>
llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
llvm::ArrayRef<T> Range, Eq F) {
- for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
+ assert(Subrange.size() >= 1);
+ if (Range.size() < Subrange.size())
+ return llvm::makeArrayRef(Range.end(), Range.end());
+ for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size();
+ Begin <= Last; ++Begin) {
auto It = Begin;
- for (auto ItSub = Subrange.begin();
- ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
+ for (auto ItSub = Subrange.begin(); ItSub != Subrange.end();
+ ++ItSub, ++It) {
if (!F(*ItSub, *It))
goto continue_outer;
}
@@ -906,4 +911,111 @@ TEST_F(TokenBufferTest, Touching) {
ASSERT_EQ(Code.points().size(), 8u);
}
+TEST_F(TokenBufferTest, ExpandedBySpelled) {
+ recordTokens(R"cpp(
+ a1 a2 a3 b1 b2
+ )cpp");
+ // Sanity check: expanded and spelled tokens are stored separately.
+ EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
+ // Searching for subranges of expanded tokens should give the corresponding
+ // spelled ones.
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
+ ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")),
+ ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")),
+ ElementsAre(SameRange(findExpanded("b1 b2"))));
+
+ // Test search on simple macro expansions.
+ recordTokens(R"cpp(
+ #define A a1 a2 a3
+ #define B b1 b2
+
+ A split B
+ )cpp");
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")),
+ ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()),
+ ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()),
+ ElementsAre(SameRange(findExpanded("b1 b2"))));
+
+ // Ranges not fully covering macro expansions should fail.
+ recordTokens(R"cpp(
+ #define ID(x) x
+
+ ID(a)
+ )cpp");
+ // Spelled don't cover entire mapping (missing ID token) -> empty result
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty());
+ // Spelled don't cover entire mapping (missing ) token) -> empty result
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty());
+
+ // Recursive macro invocations.
+ recordTokens(R"cpp(
+ #define ID(x) x
+ #define B b1 b2
+
+ ID(ID(ID(a1) a2 a3)) split ID(B)
+ )cpp");
+
+ EXPECT_THAT(
+ Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
+ ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")),
+ ElementsAre(SameRange(findExpanded("b1 b2"))));
+ EXPECT_THAT(Buffer.expandedForSpelled(
+ findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
+ ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
+ // FIXME: these should succeed, but we do not support macro arguments yet.
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty());
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
+ IsEmpty());
+
+ // Empty macro expansions.
+ recordTokens(R"cpp(
+ #define EMPTY
+ #define ID(X) X
+
+ EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
+ EMPTY EMPTY ID(4 5 6) split2
+ ID(7 8 9) EMPTY EMPTY
+ )cpp");
+ // Covered by empty expansions on one of both of the sides.
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
+ ElementsAre(SameRange(findExpanded("1 2 3"))));
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
+ ElementsAre(SameRange(findExpanded("4 5 6"))));
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
+ ElementsAre(SameRange(findExpanded("7 8 9"))));
+ // Including the empty macro expansions on the side.
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
+ ElementsAre(SameRange(findExpanded("1 2 3"))));
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
+ ElementsAre(SameRange(findExpanded("1 2 3"))));
+ EXPECT_THAT(
+ Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
+ ElementsAre(SameRange(findExpanded("1 2 3"))));
+
+ // Empty mappings coming from various directives.
+ recordTokens(R"cpp(
+ #define ID(X) X
+ ID(1)
+ #pragma lalala
+ not_mapped
+ )cpp");
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")),
+ IsEmpty());
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")),
+ IsEmpty());
+
+ // Empty macro expansion.
+ recordTokens(R"cpp(
+ #define EMPTY
+ EMPTY int a = 100;
+ )cpp");
+ EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
+ IsEmpty());
+}
+
} // namespace
diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp
index 823045748f99..d11a3652c8e1 100644
--- a/clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -1215,6 +1215,9 @@ void test() {
| `-}
`-}
)txt");
+}
+
+TEST_F(SyntaxTreeTest, ModifiableNodes) {
// All nodes can be mutated.
expectTreeDumpEqual(
R"cpp(
More information about the cfe-commits
mailing list