[clang-tools-extra] r343778 - [clangd] clangd-indexer gathers refs and stores them in index files.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 4 07:09:55 PDT 2018


Author: sammccall
Date: Thu Oct  4 07:09:55 2018
New Revision: 343778

URL: http://llvm.org/viewvc/llvm-project?rev=343778&view=rev
Log:
[clangd] clangd-indexer gathers refs and stores them in index files.

Reviewers: ioeric

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D52531

Modified:
    clang-tools-extra/trunk/clangd/index/IndexAction.cpp
    clang-tools-extra/trunk/clangd/index/IndexAction.h
    clang-tools-extra/trunk/clangd/index/Serialization.cpp
    clang-tools-extra/trunk/clangd/index/Serialization.h
    clang-tools-extra/trunk/clangd/index/YAMLSerialization.cpp
    clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp
    clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp

Modified: clang-tools-extra/trunk/clangd/index/IndexAction.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/IndexAction.cpp?rev=343778&r1=343777&r2=343778&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/IndexAction.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/IndexAction.cpp Thu Oct  4 07:09:55 2018
@@ -13,10 +13,11 @@ public:
   IndexAction(std::shared_ptr<SymbolCollector> C,
               std::unique_ptr<CanonicalIncludes> Includes,
               const index::IndexingOptions &Opts,
-              std::function<void(SymbolSlab)> &SymbolsCallback)
+              std::function<void(SymbolSlab)> SymbolsCallback,
+              std::function<void(RefSlab)> RefsCallback)
       : WrapperFrontendAction(index::createIndexingAction(C, Opts, nullptr)),
-        SymbolsCallback(SymbolsCallback), Collector(C),
-        Includes(std::move(Includes)),
+        SymbolsCallback(SymbolsCallback), RefsCallback(RefsCallback),
+        Collector(C), Includes(std::move(Includes)),
         PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {}
 
   std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
@@ -41,10 +42,13 @@ public:
       return;
     }
     SymbolsCallback(Collector->takeSymbols());
+    if (RefsCallback != nullptr)
+      RefsCallback(Collector->takeRefs());
   }
 
 private:
   std::function<void(SymbolSlab)> SymbolsCallback;
+  std::function<void(RefSlab)> RefsCallback;
   std::shared_ptr<SymbolCollector> Collector;
   std::unique_ptr<CanonicalIncludes> Includes;
   std::unique_ptr<CommentHandler> PragmaHandler;
@@ -54,20 +58,23 @@ private:
 
 std::unique_ptr<FrontendAction>
 createStaticIndexingAction(SymbolCollector::Options Opts,
-                           std::function<void(SymbolSlab)> SymbolsCallback) {
+                           std::function<void(SymbolSlab)> SymbolsCallback,
+                           std::function<void(RefSlab)> RefsCallback) {
   index::IndexingOptions IndexOpts;
   IndexOpts.SystemSymbolFilter =
       index::IndexingOptions::SystemSymbolFilterKind::All;
   Opts.CollectIncludePath = true;
   Opts.CountReferences = true;
   Opts.Origin = SymbolOrigin::Static;
+  if (RefsCallback != nullptr)
+    Opts.RefFilter = RefKind::All;
   auto Includes = llvm::make_unique<CanonicalIncludes>();
   addSystemHeadersMapping(Includes.get());
   Opts.Includes = Includes.get();
   return llvm::make_unique<IndexAction>(
       std::make_shared<SymbolCollector>(std::move(Opts)), std::move(Includes),
-      IndexOpts, SymbolsCallback);
-}
+      IndexOpts, SymbolsCallback, RefsCallback);
+};
 
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/clangd/index/IndexAction.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/IndexAction.h?rev=343778&r1=343777&r2=343778&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/IndexAction.h (original)
+++ clang-tools-extra/trunk/clangd/index/IndexAction.h Thu Oct  4 07:09:55 2018
@@ -21,10 +21,13 @@ namespace clangd {
 // Only a subset of SymbolCollector::Options are respected:
 //   - include paths are always collected, and canonicalized appropriately
 //   - references are always counted
+//   - main-file refs are collected (if RefsCallback is non-null)
 //   - the symbol origin is always Static
+// FIXME: refs from headers should also be collected.
 std::unique_ptr<FrontendAction>
 createStaticIndexingAction(SymbolCollector::Options Opts,
-                           std::function<void(SymbolSlab)> SymbolsCallback);
+                           std::function<void(SymbolSlab)> SymbolsCallback,
+                           std::function<void(RefSlab)> RefsCallback);
 
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/clangd/index/Serialization.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.cpp?rev=343778&r1=343777&r2=343778&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Serialization.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Serialization.cpp Thu Oct  4 07:09:55 2018
@@ -298,17 +298,47 @@ Symbol readSymbol(Reader &Data, ArrayRef
   return Sym;
 }
 
+// REFS ENCODING
+// A refs section has data grouped by Symbol. Each symbol has:
+//  - SymbolID: 20 bytes
+//  - NumRefs: varint
+//  - Ref[NumRefs]
+// Fields of Ref are encoded in turn, see implementation.
+
+void writeRefs(const SymbolID &ID, ArrayRef<Ref> Refs,
+               const StringTableOut &Strings, raw_ostream &OS) {
+  OS << ID.raw();
+  writeVar(Refs.size(), OS);
+  for (const auto &Ref : Refs) {
+    OS.write(static_cast<unsigned char>(Ref.Kind));
+    writeLocation(Ref.Location, Strings, OS);
+  }
+}
+
+std::pair<SymbolID, std::vector<Ref>> readRefs(Reader &Data,
+                                               ArrayRef<StringRef> Strings) {
+  std::pair<SymbolID, std::vector<Ref>> Result;
+  Result.first = Data.consumeID();
+  Result.second.resize(Data.consumeVar());
+  for (auto &Ref : Result.second) {
+    Ref.Kind = static_cast<RefKind>(Data.consume8());
+    Ref.Location = readLocation(Data, Strings);
+  }
+  return Result;
+}
+
 // FILE ENCODING
 // A file is a RIFF chunk with type 'CdIx'.
 // It contains the sections:
 //   - meta: version number
 //   - stri: string table
 //   - symb: symbols
+//   - refs: references to symbols
 
 // The current versioning scheme is simple - non-current versions are rejected.
 // If you make a breaking change, bump this version number to invalidate stored
 // data. Later we may want to support some backward compatibility.
-constexpr static uint32_t Version = 4;
+constexpr static uint32_t Version = 5;
 
 Expected<IndexFileIn> readRIFF(StringRef Data) {
   auto RIFF = riff::readFile(Data);
@@ -342,6 +372,18 @@ Expected<IndexFileIn> readRIFF(StringRef
       return makeError("malformed or truncated symbol");
     Result.Symbols = std::move(Symbols).build();
   }
+  if (Chunks.count("refs")) {
+    Reader RefsReader(Chunks.lookup("refs"));
+    RefSlab::Builder Refs;
+    while (!RefsReader.eof()) {
+      auto RefsBundle = readRefs(RefsReader, Strings->Strings);
+      for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
+        Refs.insert(RefsBundle.first, Ref);
+    }
+    if (RefsReader.err())
+      return makeError("malformed or truncated refs");
+    Result.Refs = std::move(Refs).build();
+  }
   return std::move(Result);
 }
 
@@ -363,6 +405,14 @@ void writeRIFF(const IndexFileOut &Data,
     Symbols.emplace_back(Sym);
     visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); });
   }
+  std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
+  if (Data.Refs) {
+    for (const auto &Sym : *Data.Refs) {
+      Refs.emplace_back(Sym);
+      for (auto &Ref : Refs.back().second)
+        Strings.intern(Ref.Location.FileURI);
+    }
+  }
 
   std::string StringSection;
   {
@@ -379,6 +429,16 @@ void writeRIFF(const IndexFileOut &Data,
   }
   RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
 
+  std::string RefsSection;
+  if (Data.Refs) {
+    {
+      raw_string_ostream RefsOS(RefsSection);
+      for (const auto &Sym : Refs)
+        writeRefs(Sym.first, Sym.second, Strings, RefsOS);
+    }
+    RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
+  }
+
   OS << RIFF;
 }
 
@@ -428,6 +488,8 @@ std::unique_ptr<SymbolIndex> loadIndex(l
     if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
       if (I->Symbols)
         Symbols = std::move(*I->Symbols);
+      if (I->Refs)
+        Refs = std::move(*I->Refs);
     } else {
       llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n";
       return nullptr;

Modified: clang-tools-extra/trunk/clangd/index/Serialization.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.h?rev=343778&r1=343777&r2=343778&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Serialization.h (original)
+++ clang-tools-extra/trunk/clangd/index/Serialization.h Thu Oct  4 07:09:55 2018
@@ -38,26 +38,29 @@ enum class IndexFileFormat {
 // Holds the contents of an index file that was read.
 struct IndexFileIn {
   llvm::Optional<SymbolSlab> Symbols;
+  llvm::Optional<RefSlab> Refs;
 };
-// Parse an index file. The input must be a RIFF container chunk.
+// Parse an index file. The input must be a RIFF or YAML file.
 llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
 
 // Specifies the contents of an index file to be written.
 struct IndexFileOut {
-  const SymbolSlab *Symbols;
-  // TODO: Support serializing symbol occurrences.
+  const SymbolSlab *Symbols = nullptr;
+  const RefSlab *Refs = nullptr;
   // TODO: Support serializing Dex posting lists.
   IndexFileFormat Format = IndexFileFormat::RIFF;
 
   IndexFileOut() = default;
   IndexFileOut(const IndexFileIn &I)
-      : Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr) {}
+      : Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr),
+        Refs(I.Refs ? I.Refs.getPointer() : nullptr) {}
 };
 // Serializes an index file.
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O);
 
 // Convert a single symbol to YAML, a nice debug representation.
 std::string toYAML(const Symbol &);
+std::string toYAML(const std::pair<SymbolID, ArrayRef<Ref>> &);
 
 // Build an in-memory static index from an index file.
 // The size should be relatively small, so data can be managed in memory.

Modified: clang-tools-extra/trunk/clangd/index/YAMLSerialization.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/YAMLSerialization.cpp?rev=343778&r1=343777&r2=343778&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/YAMLSerialization.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/YAMLSerialization.cpp Thu Oct  4 07:09:55 2018
@@ -6,6 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
+// A YAML index file is a sequence of tagged entries.
+// Each entry either encodes a Symbol or the list of references to a symbol
+// (a "ref bundle").
+//
+//===----------------------------------------------------------------------===//
 
 #include "Index.h"
 #include "Serialization.h"
@@ -20,10 +26,22 @@
 #include <cstdint>
 
 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol::IncludeHeaderWithReferences)
+LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Ref)
 
+namespace {
+using RefBundle =
+    std::pair<clang::clangd::SymbolID, std::vector<clang::clangd::Ref>>;
+// This is a pale imitation of std::variant<Symbol, RefBundle>
+struct VariantEntry {
+  llvm::Optional<clang::clangd::Symbol> Symbol;
+  llvm::Optional<RefBundle> Refs;
+};
+} // namespace
 namespace llvm {
 namespace yaml {
 
+using clang::clangd::Ref;
+using clang::clangd::RefKind;
 using clang::clangd::Symbol;
 using clang::clangd::SymbolID;
 using clang::clangd::SymbolLocation;
@@ -179,6 +197,46 @@ template <> struct ScalarEnumerationTrai
   }
 };
 
+template <> struct MappingTraits<RefBundle> {
+  static void mapping(IO &IO, RefBundle &Refs) {
+    MappingNormalization<NormalizedSymbolID, SymbolID> NSymbolID(IO,
+                                                                 Refs.first);
+    IO.mapRequired("ID", NSymbolID->HexString);
+    IO.mapRequired("References", Refs.second);
+  }
+};
+
+struct NormalizedRefKind {
+  NormalizedRefKind(IO &) {}
+  NormalizedRefKind(IO &, RefKind O) { Kind = static_cast<uint8_t>(O); }
+
+  RefKind denormalize(IO &) { return static_cast<RefKind>(Kind); }
+
+  uint8_t Kind = 0;
+};
+
+template <> struct MappingTraits<Ref> {
+  static void mapping(IO &IO, Ref &R) {
+    MappingNormalization<NormalizedRefKind, RefKind> NKind(IO, R.Kind);
+    IO.mapRequired("Kind", NKind->Kind);
+    IO.mapRequired("Location", R.Location);
+  }
+};
+
+template <> struct MappingTraits<VariantEntry> {
+  static void mapping(IO &IO, VariantEntry &Variant) {
+    if (IO.mapTag("!Symbol", Variant.Symbol.hasValue())) {
+      if (!IO.outputting())
+        Variant.Symbol.emplace();
+      MappingTraits<Symbol>::mapping(IO, *Variant.Symbol);
+    } else if (IO.mapTag("!Refs", Variant.Refs.hasValue())) {
+      if (!IO.outputting())
+        Variant.Refs.emplace();
+      MappingTraits<RefBundle>::mapping(IO, *Variant.Refs);
+    }
+  }
+};
+
 } // namespace yaml
 } // namespace llvm
 
@@ -187,23 +245,38 @@ namespace clangd {
 
 void writeYAML(const IndexFileOut &O, raw_ostream &OS) {
   llvm::yaml::Output Yout(OS);
-  for (Symbol Sym : *O.Symbols) // copy: Yout<< requires mutability.
-    Yout << Sym;
+  for (const auto &Sym : *O.Symbols) {
+    VariantEntry Entry;
+    Entry.Symbol = Sym;
+    Yout << Entry;
+  }
+  if (O.Refs)
+    for (auto &Sym : *O.Refs) {
+      VariantEntry Entry;
+      Entry.Refs = Sym;
+      Yout << Entry;
+    }
 }
 
 Expected<IndexFileIn> readYAML(StringRef Data) {
   SymbolSlab::Builder Symbols;
+  RefSlab::Builder Refs;
   llvm::yaml::Input Yin(Data);
   do {
-    Symbol S;
-    Yin >> S;
+    VariantEntry Variant;
+    Yin >> Variant;
     if (Yin.error())
       return llvm::errorCodeToError(Yin.error());
-    Symbols.insert(S);
+    if (Variant.Symbol)
+      Symbols.insert(*Variant.Symbol);
+    if (Variant.Refs)
+      for (const auto &Ref : Variant.Refs->second)
+        Refs.insert(Variant.Refs->first, Ref);
   } while (Yin.nextDocument());
 
   IndexFileIn Result;
   Result.Symbols.emplace(std::move(Symbols).build());
+  Result.Refs.emplace(std::move(Refs).build());
   return std::move(Result);
 }
 
@@ -217,6 +290,17 @@ std::string toYAML(const Symbol &S) {
   }
   return Buf;
 }
+
+std::string toYAML(const std::pair<SymbolID, ArrayRef<Ref>> &Data) {
+  RefBundle Refs = {Data.first, Data.second};
+  std::string Buf;
+  {
+    llvm::raw_string_ostream OS(Buf);
+    llvm::yaml::Output Yout(OS);
+    Yout << Refs;
+  }
+  return Buf;
+}
 
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp?rev=343778&r1=343777&r2=343778&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp (original)
+++ clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp Thu Oct  4 07:09:55 2018
@@ -67,18 +67,30 @@ public:
                    else
                      Symbols.insert(Sym);
                  }
+               },
+               [&](RefSlab S) {
+                 std::lock_guard<std::mutex> Lock(SymbolsMu);
+                 for (const auto &Sym : S) {
+                   // No need to merge as currently all Refs are from main file.
+                   for (const auto &Ref : Sym.second)
+                     Refs.insert(Sym.first, Ref);
+                 }
                })
         .release();
   }
 
   // Awkward: we write the result in the destructor, because the executor
   // takes ownership so it's the easiest way to get our data back out.
-  ~IndexActionFactory() { Result.Symbols = std::move(Symbols).build(); }
+  ~IndexActionFactory() {
+    Result.Symbols = std::move(Symbols).build();
+    Result.Refs = std::move(Refs).build();
+  }
 
 private:
   IndexFileIn &Result;
   std::mutex SymbolsMu;
   SymbolSlab::Builder Symbols;
+  RefSlab::Builder Refs;
 };
 
 } // namespace

Modified: clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp?rev=343778&r1=343777&r2=343778&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp Thu Oct  4 07:09:55 2018
@@ -13,6 +13,9 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+using testing::_;
+using testing::AllOf;
+using testing::Pair;
 using testing::UnorderedElementsAre;
 using testing::UnorderedElementsAreArray;
 namespace clang {
@@ -21,6 +24,7 @@ namespace {
 
 const char *YAML = R"(
 ---
+!Symbol
 ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
 Name:   'Foo1'
 Scope:   'clang::'
@@ -46,6 +50,7 @@ IncludeHeaders:
     References:    3
 ...
 ---
+!Symbol
 ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858
 Name:   'Foo2'
 Scope:   'clang::'
@@ -64,6 +69,18 @@ Flags:    2
 Signature:    '-sig'
 CompletionSnippetSuffix:    '-snippet'
 ...
+!Refs
+ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
+References:
+  - Kind: 4
+    Location:
+      FileURI:    file:///path/foo.cc
+      Start:
+        Line: 5
+        Column: 3
+      End:
+        Line: 5
+        Column: 8
 )";
 
 MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
@@ -107,6 +124,16 @@ TEST(SerializationTest, YAMLConversions)
   EXPECT_EQ(Sym2.CanonicalDeclaration.FileURI, "file:///path/bar.h");
   EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
   EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
+
+  ASSERT_TRUE(bool(ParsedYAML->Refs));
+  EXPECT_THAT(*ParsedYAML->Refs,
+              UnorderedElementsAre(
+                  Pair(cantFail(SymbolID::fromStr(
+                           "057557CEBF6E6B2DD437FBF60CC58F352D1DF856")),
+                       testing::SizeIs(1))));
+  auto Ref1 = ParsedYAML->Refs->begin()->second.front();
+  EXPECT_EQ(Ref1.Kind, RefKind::Reference);
+  EXPECT_EQ(Ref1.Location.FileURI, "file:///path/foo.cc");
 }
 
 std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
@@ -115,24 +142,37 @@ std::vector<std::string> YAMLFromSymbols
     Result.push_back(toYAML(Sym));
   return Result;
 }
+std::vector<std::string> YAMLFromRefs(const RefSlab &Slab) {
+  std::vector<std::string> Result;
+  for (const auto &Sym : Slab)
+    Result.push_back(toYAML(Sym));
+  return Result;
+}
 
 TEST(SerializationTest, BinaryConversions) {
   auto In = readIndexFile(YAML);
   EXPECT_TRUE(bool(In)) << In.takeError();
 
   // Write to binary format, and parse again.
-  IndexFileOut Out;
-  Out.Symbols = In->Symbols.getPointer();
+  IndexFileOut Out(*In);
   Out.Format = IndexFileFormat::RIFF;
   std::string Serialized = llvm::to_string(Out);
+  {
+    std::error_code EC;
+    llvm::raw_fd_ostream F("/tmp/foo", EC);
+    F << Serialized;
+  }
 
   auto In2 = readIndexFile(Serialized);
   ASSERT_TRUE(bool(In2)) << In.takeError();
-  ASSERT_TRUE(In->Symbols);
+  ASSERT_TRUE(In2->Symbols);
+  ASSERT_TRUE(In2->Refs);
 
   // Assert the YAML serializations match, for nice comparisons and diffs.
   EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
               UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
+  EXPECT_THAT(YAMLFromRefs(*In2->Refs),
+              UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
 }
 
 } // namespace




More information about the cfe-commits mailing list