[clang] 1bf055c - [Syntax] Add mapping from spelled to expanded tokens for TokenBuffer

Dmitri Gribenko via cfe-commits cfe-commits at lists.llvm.org
Tue Apr 7 06:10:55 PDT 2020


Author: Marcel Hlopko
Date: 2020-04-07T15:07:16+02:00
New Revision: 1bf055c9891f1a5ab2ff6a04348bd83fcc0a9cde

URL: https://github.com/llvm/llvm-project/commit/1bf055c9891f1a5ab2ff6a04348bd83fcc0a9cde
DIFF: https://github.com/llvm/llvm-project/commit/1bf055c9891f1a5ab2ff6a04348bd83fcc0a9cde.diff

LOG: [Syntax] Add mapping from spelled to expanded tokens for TokenBuffer

Summary:
Same restrictions apply as in the other direction: macro arguments are
not supported yet, only full macro expansions can be mapped.

Taking over from https://reviews.llvm.org/D72581.

Reviewers: gribozavr2, sammccall

Reviewed By: gribozavr2

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D77209

Added: 
    

Modified: 
    clang/include/clang/Tooling/Syntax/Tokens.h
    clang/lib/Tooling/Syntax/Tokens.cpp
    clang/unittests/Tooling/Syntax/TokensTest.cpp
    clang/unittests/Tooling/Syntax/TreeTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h
index 7e50892284f4..a7f9369ddfff 100644
--- a/clang/include/clang/Tooling/Syntax/Tokens.h
+++ b/clang/include/clang/Tooling/Syntax/Tokens.h
@@ -171,7 +171,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T);
 /// To build a token buffer use the TokenCollector class. You can also compute
 /// the spelled tokens of a file using the tokenize() helper.
 ///
-/// FIXME: allow to map from spelled to expanded tokens when use-case shows up.
 /// FIXME: allow mappings into macro arguments.
 class TokenBuffer {
 public:
@@ -228,6 +227,36 @@ class TokenBuffer {
   llvm::Optional<llvm::ArrayRef<syntax::Token>>
   spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const;
 
+  /// Find the subranges of expanded tokens, corresponding to \p Spelled.
+  ///
+  /// Some spelled tokens may not be present in the expanded token stream, so
+  /// this function can return an empty vector, e.g. for tokens of macro
+  /// directives or disabled preprocessor branches.
+  ///
+  /// Some spelled tokens can be duplicated in the expanded token stream
+  /// multiple times and this function will return multiple results in those
+  /// cases. This happens when \p Spelled is inside a macro argument.
+  ///
+  /// FIXME: return correct results on macro arguments. For now, we return an
+  ///        empty list.
+  ///
+  /// (!) will return empty vector on tokens from #define body:
+  /// E.g. for the following example:
+  ///
+  ///   #define FIRST(A) f1 A = A f2
+  ///   #define SECOND s
+  ///
+  ///   a FIRST(arg) b SECOND c // expanded tokens are: a f1 arg = arg f2 b s
+  /// The results would be
+  ///   spelled           => expanded
+  ///   ------------------------
+  ///   #define FIRST     => {}
+  ///   a FIRST(arg)      => {a f1 arg = arg f2}
+  ///   arg               => {arg, arg} // arg #1 is before `=` and arg #2 is
+  ///                                   // after `=` in the expanded tokens.
+  llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
+  expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const;
+
   /// An expansion produced by the preprocessor, includes macro expansions and
   /// preprocessor directives. Preprocessor always maps a non-empty range of
   /// spelled tokens to a (possibly empty) range of expanded tokens. Here is a
@@ -317,6 +346,12 @@ class TokenBuffer {
   std::pair<const syntax::Token *, const Mapping *>
   spelledForExpandedToken(const syntax::Token *Expanded) const;
 
+  /// Returns a mapping starting before \p Spelled token, or nullptr if no
+  /// such mapping exists.
+  static const Mapping *
+  mappingStartingBeforeSpelled(const MarkedFile &F,
+                               const syntax::Token *Spelled);
+
   /// Token stream produced after preprocessing, conceputally this captures the
   /// same stream as 'clang -E' (excluding the preprocessor directives like
   /// #file, etc.).

diff  --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp
index f214afb5c85e..1464d97d037d 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -213,19 +213,109 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
   // Our token could only be produced by the previous mapping.
   if (It == File.Mappings.begin()) {
     // No previous mapping, no need to modify offsets.
-    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr};
+    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
+            /*Mapping=*/nullptr};
   }
   --It; // 'It' now points to last mapping that started before our token.
 
   // Check if the token is part of the mapping.
   if (ExpandedIndex < It->EndExpanded)
-    return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It};
+    return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
 
   // Not part of the mapping, use the index from previous mapping to compute the
   // corresponding spelled token.
   return {
       &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
-      /*Mapping*/ nullptr};
+      /*Mapping=*/nullptr};
+}
+
+const TokenBuffer::Mapping *
+TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
+                                          const syntax::Token *Spelled) {
+  assert(F.SpelledTokens.data() <= Spelled);
+  unsigned SpelledI = Spelled - F.SpelledTokens.data();
+  assert(SpelledI < F.SpelledTokens.size());
+
+  auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
+    return M.BeginSpelled <= SpelledI;
+  });
+  if (It == F.Mappings.begin())
+    return nullptr;
+  --It;
+  return &*It;
+}
+
+llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
+TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
+  if (Spelled.empty())
+    return {};
+  assert(Spelled.front().location().isFileID());
+
+  auto FID = sourceManager().getFileID(Spelled.front().location());
+  auto It = Files.find(FID);
+  assert(It != Files.end());
+
+  const MarkedFile &File = It->second;
+  // `Spelled` must be a subrange of `File.SpelledTokens`.
+  assert(File.SpelledTokens.data() <= Spelled.data());
+  assert(&Spelled.back() <=
+         File.SpelledTokens.data() + File.SpelledTokens.size());
+#ifndef NDEBUG
+  auto T1 = Spelled.back().location();
+  auto T2 = File.SpelledTokens.back().location();
+  assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
+#endif
+
+  auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
+  unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
+  assert(SpelledFrontI < File.SpelledTokens.size());
+  unsigned ExpandedBegin;
+  if (!FrontMapping) {
+    // No mapping that starts before the first token of Spelled, we don't have
+    // to modify offsets.
+    ExpandedBegin = File.BeginExpanded + SpelledFrontI;
+  } else if (SpelledFrontI < FrontMapping->EndSpelled) {
+    // This mapping applies to Spelled tokens.
+    if (SpelledFrontI != FrontMapping->BeginSpelled) {
+      // Spelled tokens don't cover the entire mapping, returning empty result.
+      return {}; // FIXME: support macro arguments.
+    }
+    // Spelled tokens start at the beginning of this mapping.
+    ExpandedBegin = FrontMapping->BeginExpanded;
+  } else {
+    // Spelled tokens start after the mapping ends (they start in the hole
+    // between 2 mappings, or between a mapping and end of the file).
+    ExpandedBegin =
+        FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
+  }
+
+  auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
+  unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
+  unsigned ExpandedEnd;
+  if (!BackMapping) {
+    // No mapping that starts before the last token of Spelled, we don't have to
+    // modify offsets.
+    ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
+  } else if (SpelledBackI < BackMapping->EndSpelled) {
+    // This mapping applies to Spelled tokens.
+    if (SpelledBackI + 1 != BackMapping->EndSpelled) {
+      // Spelled tokens don't cover the entire mapping, returning empty result.
+      return {}; // FIXME: support macro arguments.
+    }
+    ExpandedEnd = BackMapping->EndExpanded;
+  } else {
+    // Spelled tokens end after the mapping ends.
+    ExpandedEnd =
+        BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
+  }
+
+  assert(ExpandedBegin < ExpandedTokens.size());
+  assert(ExpandedEnd < ExpandedTokens.size());
+  // Avoid returning empty ranges.
+  if (ExpandedBegin == ExpandedEnd)
+    return {};
+  return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
+                             ExpandedTokens.data() + ExpandedEnd)};
 }
 
 llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
@@ -550,6 +640,20 @@ class TokenCollector::Builder {
     for (const auto &File : Result.Files)
       discard(File.first);
 
+#ifndef NDEBUG
+    for (auto &pair : Result.Files) {
+      auto &mappings = pair.second.Mappings;
+      assert(std::is_sorted(
+          mappings.begin(), mappings.end(),
+          [](const TokenBuffer::Mapping &M1, const TokenBuffer::Mapping &M2) {
+            return M1.BeginSpelled < M2.BeginSpelled &&
+                   M1.EndSpelled < M2.EndSpelled &&
+                   M1.BeginExpanded < M2.BeginExpanded &&
+                   M1.EndExpanded < M2.EndExpanded;
+          }));
+    }
+#endif
+
     return std::move(Result);
   }
 

diff  --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index a89d67fc6a76..453ab608e659 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -57,6 +57,7 @@ using ::testing::AllOf;
 using ::testing::Contains;
 using ::testing::ElementsAre;
 using ::testing::Field;
+using ::testing::IsEmpty;
 using ::testing::Matcher;
 using ::testing::Not;
 using ::testing::Pointee;
@@ -185,10 +186,14 @@ class TokenCollectorTest : public ::testing::Test {
   template <class T, class U, class Eq>
   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
                                  llvm::ArrayRef<T> Range, Eq F) {
-    for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
+    assert(Subrange.size() >= 1);
+    if (Range.size() < Subrange.size())
+      return llvm::makeArrayRef(Range.end(), Range.end());
+    for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size();
+         Begin <= Last; ++Begin) {
       auto It = Begin;
-      for (auto ItSub = Subrange.begin();
-           ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
+      for (auto ItSub = Subrange.begin(); ItSub != Subrange.end();
+           ++ItSub, ++It) {
         if (!F(*ItSub, *It))
           goto continue_outer;
       }
@@ -906,4 +911,111 @@ TEST_F(TokenBufferTest, Touching) {
   ASSERT_EQ(Code.points().size(), 8u);
 }
 
+TEST_F(TokenBufferTest, ExpandedBySpelled) {
+  recordTokens(R"cpp(
+    a1 a2 a3 b1 b2
+  )cpp");
+  // Sanity check: expanded and spelled tokens are stored separately.
+  EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
+  // Searching for subranges of expanded tokens should give the corresponding
+  // spelled ones.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")),
+              ElementsAre(SameRange(findExpanded("b1 b2"))));
+
+  // Test search on simple macro expansions.
+  recordTokens(R"cpp(
+    #define A a1 a2 a3
+    #define B b1 b2
+
+    A split B
+  )cpp");
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()),
+              ElementsAre(SameRange(findExpanded("b1 b2"))));
+
+  // Ranges not fully covering macro expansions should fail.
+  recordTokens(R"cpp(
+    #define ID(x) x
+
+    ID(a)
+  )cpp");
+  // Spelled don't cover entire mapping (missing ID token) -> empty result
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty());
+  // Spelled don't cover entire mapping (missing ) token) -> empty result
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty());
+
+  // Recursive macro invocations.
+  recordTokens(R"cpp(
+    #define ID(x) x
+    #define B b1 b2
+
+    ID(ID(ID(a1) a2 a3)) split ID(B)
+  )cpp");
+
+  EXPECT_THAT(
+      Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
+      ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")),
+              ElementsAre(SameRange(findExpanded("b1 b2"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(
+                  findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
+  // FIXME: these should succeed, but we do not support macro arguments yet.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty());
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
+              IsEmpty());
+
+  // Empty macro expansions.
+  recordTokens(R"cpp(
+    #define EMPTY
+    #define ID(X) X
+
+    EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
+    EMPTY EMPTY ID(4 5 6) split2
+    ID(7 8 9) EMPTY EMPTY
+  )cpp");
+  // Covered by empty expansions on one of both of the sides.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
+              ElementsAre(SameRange(findExpanded("1 2 3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
+              ElementsAre(SameRange(findExpanded("4 5 6"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
+              ElementsAre(SameRange(findExpanded("7 8 9"))));
+  // Including the empty macro expansions on the side.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
+              ElementsAre(SameRange(findExpanded("1 2 3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
+              ElementsAre(SameRange(findExpanded("1 2 3"))));
+  EXPECT_THAT(
+      Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
+      ElementsAre(SameRange(findExpanded("1 2 3"))));
+
+  // Empty mappings coming from various directives.
+  recordTokens(R"cpp(
+    #define ID(X) X
+    ID(1)
+    #pragma lalala
+    not_mapped
+  )cpp");
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")),
+              IsEmpty());
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")),
+              IsEmpty());
+
+  // Empty macro expansion.
+  recordTokens(R"cpp(
+    #define EMPTY
+    EMPTY int a = 100;
+  )cpp");
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
+              IsEmpty());
+}
+
 } // namespace

diff  --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp
index 823045748f99..d11a3652c8e1 100644
--- a/clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -1215,6 +1215,9 @@ void test() {
     |   `-}
     `-}
        )txt");
+}
+
+TEST_F(SyntaxTreeTest, ModifiableNodes) {
   // All nodes can be mutated.
   expectTreeDumpEqual(
       R"cpp(


        


More information about the cfe-commits mailing list