[clang-tools-extra] r341208 - [clangd] Collect symbol occurrences in SymbolCollector.

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 31 05:54:13 PDT 2018


Author: hokein
Date: Fri Aug 31 05:54:13 2018
New Revision: 341208

URL: http://llvm.org/viewvc/llvm-project?rev=341208&view=rev
Log:
[clangd] Collect symbol occurrences in SymbolCollector.

SymbolCollector will be used for two cases:
 - collect Symbol type only, used for indexing preamble AST.
 - collect Symbol and SymbolOccurrences, used for indexing main AST.

For finding local references from the AST, we will implement it in other ways.

Modified:
    clang-tools-extra/trunk/clangd/index/Index.cpp
    clang-tools-extra/trunk/clangd/index/Index.h
    clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp
    clang-tools-extra/trunk/clangd/index/SymbolCollector.h
    clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp

Modified: clang-tools-extra/trunk/clangd/index/Index.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.cpp?rev=341208&r1=341207&r2=341208&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Index.cpp Fri Aug 31 05:54:13 2018
@@ -128,5 +128,48 @@ SymbolSlab SymbolSlab::Builder::build()
   return SymbolSlab(std::move(NewArena), std::move(Symbols));
 }
 
+raw_ostream &operator<<(raw_ostream &OS, SymbolOccurrenceKind K) {
+  if (K == SymbolOccurrenceKind::Unknown)
+    return OS << "Unknown";
+  static const std::vector<const char *> Messages = {"Decl", "Def", "Ref"};
+  bool VisitedOnce = false;
+  for (unsigned I = 0; I < Messages.size(); ++I) {
+    if (static_cast<uint8_t>(K) & 1u << I) {
+      if (VisitedOnce)
+        OS << ", ";
+      OS << Messages[I];
+      VisitedOnce = true;
+    }
+  }
+  return OS;
+}
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                              const SymbolOccurrence &Occurrence) {
+  OS << Occurrence.Location << ":" << Occurrence.Kind;
+  return OS;
+}
+
+void SymbolOccurrenceSlab::insert(const SymbolID &SymID,
+                                  const SymbolOccurrence &Occurrence) {
+  assert(!Frozen &&
+         "Can't insert a symbol occurrence after the slab has been frozen!");
+  auto &SymOccurrences = Occurrences[SymID];
+  SymOccurrences.push_back(Occurrence);
+  SymOccurrences.back().Location.FileURI =
+      UniqueStrings.save(Occurrence.Location.FileURI);
+}
+
+void SymbolOccurrenceSlab::freeze() {
+  // Deduplicate symbol occurrenes.
+  for (auto &IDAndOccurrence : Occurrences) {
+    auto &Occurrence = IDAndOccurrence.getSecond();
+    std::sort(Occurrence.begin(), Occurrence.end());
+    Occurrence.erase(std::unique(Occurrence.begin(), Occurrence.end()),
+                     Occurrence.end());
+  }
+  Frozen = true;
+}
+
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/clangd/index/Index.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.h?rev=341208&r1=341207&r2=341208&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.h (original)
+++ clang-tools-extra/trunk/clangd/index/Index.h Fri Aug 31 05:54:13 2018
@@ -32,9 +32,6 @@ struct SymbolLocation {
     uint32_t Line = 0; // 0-based
     // Using UTF-16 code units.
     uint32_t Column = 0; // 0-based
-    bool operator==(const Position& P) const {
-      return Line == P.Line && Column == P.Column;
-    }
   };
 
   // The URI of the source file where a symbol occurs.
@@ -45,11 +42,23 @@ struct SymbolLocation {
   Position End;
 
   explicit operator bool() const { return !FileURI.empty(); }
-  bool operator==(const SymbolLocation& Loc) const {
-    return std::tie(FileURI, Start, End) ==
-           std::tie(Loc.FileURI, Loc.Start, Loc.End);
-  }
 };
+inline bool operator==(const SymbolLocation::Position &L,
+                       const SymbolLocation::Position &R) {
+  return std::tie(L.Line, L.Column) == std::tie(R.Line, R.Column);
+}
+inline bool operator<(const SymbolLocation::Position &L,
+                      const SymbolLocation::Position &R) {
+  return std::tie(L.Line, L.Column) < std::tie(R.Line, R.Column);
+}
+inline bool operator==(const SymbolLocation &L, const SymbolLocation &R) {
+  return std::tie(L.FileURI, L.Start, L.End) ==
+         std::tie(R.FileURI, R.Start, R.End);
+}
+inline bool operator<(const SymbolLocation &L, const SymbolLocation &R) {
+  return std::tie(L.FileURI, L.Start, L.End) <
+         std::tie(R.FileURI, R.Start, R.End);
+}
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &);
 
 // The class identifies a particular C++ symbol (class, function, method, etc).
@@ -314,6 +323,9 @@ inline SymbolOccurrenceKind operator&(Sy
   return static_cast<SymbolOccurrenceKind>(static_cast<uint8_t>(A) &
                                            static_cast<uint8_t>(B));
 }
+static const SymbolOccurrenceKind AllOccurrenceKinds =
+    SymbolOccurrenceKind::Declaration | SymbolOccurrenceKind::Definition |
+    SymbolOccurrenceKind::Reference;
 
 // Represents a symbol occurrence in the source file. It could be a
 // declaration/definition/reference occurrence.
@@ -324,6 +336,61 @@ struct SymbolOccurrence {
   SymbolLocation Location;
   SymbolOccurrenceKind Kind = SymbolOccurrenceKind::Unknown;
 };
+inline bool operator<(const SymbolOccurrence &L, const SymbolOccurrence &R) {
+  return std::tie(L.Location, L.Kind) < std::tie(R.Location, R.Kind);
+}
+inline bool operator==(const SymbolOccurrence &L, const SymbolOccurrence &R) {
+  return std::tie(L.Location, L.Kind) == std::tie(R.Location, R.Kind);
+}
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                              const SymbolOccurrence &Occurrence);
+
+// An efficient structure of storing large set of symbol occurrences in memory.
+// Filenames are deduplicated.
+class SymbolOccurrenceSlab {
+public:
+  using const_iterator =
+      llvm::DenseMap<SymbolID, std::vector<SymbolOccurrence>>::const_iterator;
+  using iterator = const_iterator;
+
+  SymbolOccurrenceSlab() : UniqueStrings(Arena) {}
+
+  // Define move semantics for the slab, allowing assignment from an rvalue.
+  // Implicit move assignment is deleted by the compiler because
+  // StringSaver has a reference type member.
+  SymbolOccurrenceSlab(SymbolOccurrenceSlab &&Slab) = default;
+  SymbolOccurrenceSlab &operator=(SymbolOccurrenceSlab &&RHS) {
+    assert(RHS.Frozen &&
+           "SymbolOcucrrenceSlab must be frozen when move assigned!");
+    Arena = std::move(RHS.Arena);
+    Frozen = true;
+    Occurrences = std::move(RHS.Occurrences);
+    return *this;
+  }
+
+  const_iterator begin() const { return Occurrences.begin(); }
+  const_iterator end() const { return Occurrences.end(); }
+
+  // Adds a symbol occurrence.
+  // This is a deep copy: underlying FileURI will be owned by the slab.
+  void insert(const SymbolID &SymID, const SymbolOccurrence &Occurrence);
+
+  llvm::ArrayRef<SymbolOccurrence> find(const SymbolID &ID) const {
+    assert(Frozen && "SymbolOccurrenceSlab must be frozen before looking up!");
+    auto It = Occurrences.find(ID);
+    if (It == Occurrences.end())
+      return {};
+    return It->second;
+  }
+
+  void freeze();
+
+private:
+  bool Frozen = false;
+  llvm::BumpPtrAllocator Arena;
+  llvm::UniqueStringSaver UniqueStrings;
+  llvm::DenseMap<SymbolID, std::vector<SymbolOccurrence>> Occurrences;
+};
 
 struct FuzzyFindRequest {
   /// \brief A query string for the fuzzy find. This is matched against symbols'

Modified: clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp?rev=341208&r1=341207&r2=341208&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp Fri Aug 31 05:54:13 2018
@@ -182,7 +182,24 @@ getIncludeHeader(llvm::StringRef QName,
   return toURI(SM, Header, Opts);
 }
 
-// Return the symbol location of the token at \p Loc.
+// Return the symbol range of the token at \p TokLoc.
+std::pair<SymbolLocation::Position, SymbolLocation::Position>
+getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
+              const LangOptions &LangOpts) {
+  auto CreatePosition = [&SM](SourceLocation Loc) {
+    auto LSPLoc = sourceLocToPosition(SM, Loc);
+    SymbolLocation::Position Pos;
+    Pos.Line = LSPLoc.line;
+    Pos.Column = LSPLoc.character;
+    return Pos;
+  };
+
+  auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
+  return {CreatePosition(TokLoc),
+          CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
+}
+
+// Return the symbol location of the token at \p TokLoc.
 llvm::Optional<SymbolLocation>
 getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
                  const SymbolCollector::Options &Opts,
@@ -194,19 +211,9 @@ getTokenLocation(SourceLocation TokLoc,
   FileURIStorage = std::move(*U);
   SymbolLocation Result;
   Result.FileURI = FileURIStorage;
-  auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
-
-  auto CreatePosition = [&SM](SourceLocation Loc) {
-    auto LSPLoc = sourceLocToPosition(SM, Loc);
-    SymbolLocation::Position Pos;
-    Pos.Line = LSPLoc.line;
-    Pos.Column = LSPLoc.character;
-    return Pos;
-  };
-
-  Result.Start = CreatePosition(TokLoc);
-  auto EndLoc = TokLoc.getLocWithOffset(TokenLength);
-  Result.End = CreatePosition(EndLoc);
+  auto Range = getTokenRange(TokLoc, SM, LangOpts);
+  Result.Start = Range.first;
+  Result.End = Range.second;
 
   return std::move(Result);
 }
@@ -224,6 +231,11 @@ bool isPreferredDeclaration(const NamedD
          match(decl(isExpansionInMainFile()), ND, ND.getASTContext()).empty();
 }
 
+SymbolOccurrenceKind toOccurrenceKind(index::SymbolRoleSet Roles) {
+  return static_cast<SymbolOccurrenceKind>(
+      static_cast<unsigned>(AllOccurrenceKinds) & Roles);
+}
+
 } // namespace
 
 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
@@ -308,11 +320,16 @@ bool SymbolCollector::handleDeclOccurenc
   // Mark D as referenced if this is a reference coming from the main file.
   // D may not be an interesting symbol, but it's cheaper to check at the end.
   auto &SM = ASTCtx->getSourceManager();
+  auto SpellingLoc = SM.getSpellingLoc(Loc);
   if (Opts.CountReferences &&
       (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
-      SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
+      SM.getFileID(SpellingLoc) == SM.getMainFileID())
     ReferencedDecls.insert(ND);
 
+  if ((static_cast<unsigned>(Opts.OccurrenceFilter) & Roles) &&
+      SM.getFileID(SpellingLoc) == SM.getMainFileID())
+    DeclOccurrences[ND].emplace_back(SpellingLoc, Roles);
+
   // Don't continue indexing if this is a mere reference.
   if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
         Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
@@ -436,8 +453,35 @@ void SymbolCollector::finish() {
           IncRef(SymbolID(USR));
     }
   }
+
+  const auto &SM = ASTCtx->getSourceManager();
+  auto* MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
+
+  if (auto MainFileURI = toURI(SM, MainFileEntry->getName(), Opts)) {
+    std::string MainURI = *MainFileURI;
+    for (const auto &It : DeclOccurrences) {
+      if (auto ID = getSymbolID(It.first)) {
+        if (Symbols.find(*ID)) {
+          for (const auto &LocAndRole : It.second) {
+            SymbolOccurrence Occurrence;
+            auto Range =
+                getTokenRange(LocAndRole.first, SM, ASTCtx->getLangOpts());
+            Occurrence.Location.Start = Range.first;
+            Occurrence.Location.End = Range.second;
+            Occurrence.Location.FileURI = MainURI;
+            Occurrence.Kind = toOccurrenceKind(LocAndRole.second);
+            SymbolOccurrences.insert(*ID, Occurrence);
+          }
+        }
+      }
+    }
+  } else {
+    log("Failed to create URI for main file: {0}", MainFileEntry->getName());
+  }
+
   ReferencedDecls.clear();
   ReferencedMacros.clear();
+  DeclOccurrences.clear();
 }
 
 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND,

Modified: clang-tools-extra/trunk/clangd/index/SymbolCollector.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolCollector.h?rev=341208&r1=341207&r2=341208&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolCollector.h (original)
+++ clang-tools-extra/trunk/clangd/index/SymbolCollector.h Fri Aug 31 05:54:13 2018
@@ -52,6 +52,10 @@ public:
     const CanonicalIncludes *Includes = nullptr;
     // Populate the Symbol.References field.
     bool CountReferences = false;
+    /// The symbol occurrence kind that will be collected.
+    /// If not set (Unknown), SymbolCollector will not collect any symbol
+    /// occurrences.
+    SymbolOccurrenceKind OccurrenceFilter = SymbolOccurrenceKind::Unknown;
     // Every symbol collected will be stamped with this origin.
     SymbolOrigin Origin = SymbolOrigin::Unknown;
     /// Collect macros.
@@ -86,6 +90,11 @@ public:
 
   SymbolSlab takeSymbols() { return std::move(Symbols).build(); }
 
+  SymbolOccurrenceSlab takeOccurrences() {
+    SymbolOccurrences.freeze();
+    return std::move(SymbolOccurrences);
+  }
+
   void finish() override;
 
 private:
@@ -94,14 +103,21 @@ private:
 
   // All Symbols collected from the AST.
   SymbolSlab::Builder Symbols;
+  // All symbol occurrences collected from the AST.
+  // Only symbols declared in preamble (from #inclues) and references from the
+  // main file will be included.
+  SymbolOccurrenceSlab SymbolOccurrences;
   ASTContext *ASTCtx;
   std::shared_ptr<Preprocessor> PP;
   std::shared_ptr<GlobalCodeCompletionAllocator> CompletionAllocator;
   std::unique_ptr<CodeCompletionTUInfo> CompletionTUInfo;
   Options Opts;
+  using DeclOccurrence = std::pair<SourceLocation, index::SymbolRoleSet>;
   // Symbols referenced from the current TU, flushed on finish().
   llvm::DenseSet<const NamedDecl *> ReferencedDecls;
   llvm::DenseSet<const IdentifierInfo *> ReferencedMacros;
+  llvm::DenseMap<const NamedDecl *, std::vector<DeclOccurrence>>
+      DeclOccurrences;
   // Maps canonical declaration provided by clang to canonical declaration for
   // an index symbol, if clangd prefers a different declaration than that
   // provided by clang. For example, friend declaration might be considered

Modified: clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp?rev=341208&r1=341207&r2=341208&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp Fri Aug 31 05:54:13 2018
@@ -28,9 +28,15 @@
 #include <memory>
 #include <string>
 
+namespace clang {
+namespace clangd {
+
+namespace {
+
 using testing::AllOf;
 using testing::Eq;
 using testing::Field;
+using testing::IsEmpty;
 using testing::Not;
 using testing::UnorderedElementsAre;
 using testing::UnorderedElementsAreArray;
@@ -74,11 +80,18 @@ MATCHER_P(Refs, R, "") { return int(arg.
 MATCHER_P(ForCodeCompletion, IsIndexedForCodeCompletion, "") {
   return arg.IsIndexedForCodeCompletion == IsIndexedForCodeCompletion;
 }
-
-namespace clang {
-namespace clangd {
-
-namespace {
+MATCHER(OccurrenceRange, "") {
+  const SymbolOccurrence &Pos = testing::get<0>(arg);
+  const Range &Range = testing::get<1>(arg);
+  return std::tie(Pos.Location.Start.Line, Pos.Location.Start.Column,
+                  Pos.Location.End.Line, Pos.Location.End.Column) ==
+         std::tie(Range.start.line, Range.start.character, Range.end.line,
+                  Range.end.character);
+}
+testing::Matcher<const std::vector<SymbolOccurrence> &>
+HaveRanges(const std::vector<Range> Ranges) {
+  return testing::UnorderedPointwise(OccurrenceRange(), Ranges);
+}
 
 class ShouldCollectSymbolTest : public ::testing::Test {
 public:
@@ -237,6 +250,7 @@ public:
                                 llvm::MemoryBuffer::getMemBuffer(MainCode));
     Invocation.run();
     Symbols = Factory->Collector->takeSymbols();
+    SymbolOccurrences = Factory->Collector->takeOccurrences();
     return true;
   }
 
@@ -247,6 +261,7 @@ protected:
   std::string TestFileName;
   std::string TestFileURI;
   SymbolSlab Symbols;
+  SymbolOccurrenceSlab SymbolOccurrences;
   SymbolCollector::Options CollectorOpts;
   std::unique_ptr<CommentHandler> PragmaHandler;
 };
@@ -413,6 +428,59 @@ o]]();
           ));
 }
 
+TEST_F(SymbolCollectorTest, Occurrences) {
+  Annotations Header(R"(
+  class $foo[[Foo]] {
+  public:
+    $foo[[Foo]]() {}
+    $foo[[Foo]](int);
+  };
+  class $bar[[Bar]];
+  void $func[[func]]();
+  )");
+  Annotations Main(R"(
+  class $bar[[Bar]] {};
+
+  void $func[[func]]();
+
+  void fff() {
+    $foo[[Foo]] foo;
+    $bar[[Bar]] bar;
+    $func[[func]]();
+    int abc = 0;
+    $foo[[Foo]] foo2 = abc;
+  }
+  )");
+  Annotations SymbolsOnlyInMainCode(R"(
+  int a;
+  void b() {}
+  static const int c = 0;
+  class d {};
+  )");
+  CollectorOpts.OccurrenceFilter = AllOccurrenceKinds;
+  runSymbolCollector(Header.code(),
+                     (Main.code() + SymbolsOnlyInMainCode.code()).str());
+  auto HeaderSymbols = TestTU::withHeaderCode(Header.code()).headerSymbols();
+
+  EXPECT_THAT(SymbolOccurrences.find(findSymbol(Symbols, "Foo").ID),
+              HaveRanges(Main.ranges("foo")));
+  EXPECT_THAT(SymbolOccurrences.find(findSymbol(Symbols, "Bar").ID),
+              HaveRanges(Main.ranges("bar")));
+  EXPECT_THAT(SymbolOccurrences.find(findSymbol(Symbols, "func").ID),
+              HaveRanges(Main.ranges("func")));
+
+  // Retrieve IDs for symbols *only* in the main file, and verify these symbols
+  // are not collected.
+  auto MainSymbols =
+      TestTU::withHeaderCode(SymbolsOnlyInMainCode.code()).headerSymbols();
+  EXPECT_THAT(SymbolOccurrences.find(findSymbol(MainSymbols, "a").ID),
+              IsEmpty());
+  EXPECT_THAT(SymbolOccurrences.find(findSymbol(MainSymbols, "b").ID),
+              IsEmpty());
+  EXPECT_THAT(SymbolOccurrences.find(findSymbol(MainSymbols, "c").ID),
+              IsEmpty());
+}
+
 TEST_F(SymbolCollectorTest, References) {
   const std::string Header = R"(
     class W;




More information about the cfe-commits mailing list