[clang-tools-extra] r346221 - [clangd] auto-index stores symbols per-file instead of per-TU.

Eric Liu via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 6 02:55:22 PST 2018


Author: ioeric
Date: Tue Nov  6 02:55:21 2018
New Revision: 346221

URL: http://llvm.org/viewvc/llvm-project?rev=346221&view=rev
Log:
[clangd] auto-index stores symbols per-file instead of per-TU.

Summary:
This allows us to deduplicate header symbols across TUs. File digests
are collects when collecting symbols/refs. And the index store deduplicates
file symbols based on the file digest.

Reviewers: sammccall, hokein

Reviewed By: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D53433

Modified:
    clang-tools-extra/trunk/clangd/index/Background.cpp
    clang-tools-extra/trunk/clangd/index/Background.h
    clang-tools-extra/trunk/clangd/index/FileIndex.cpp
    clang-tools-extra/trunk/clangd/index/FileIndex.h
    clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp
    clang-tools-extra/trunk/clangd/index/SymbolCollector.h
    clang-tools-extra/trunk/clangd/index/dex/Dex.h
    clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp
    clang-tools-extra/trunk/unittests/clangd/DexTests.cpp
    clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp
    clang-tools-extra/trunk/unittests/clangd/SyncAPI.cpp
    clang-tools-extra/trunk/unittests/clangd/SyncAPI.h

Modified: clang-tools-extra/trunk/clangd/index/Background.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.cpp?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Background.cpp Tue Nov  6 02:55:21 2018
@@ -13,11 +13,19 @@
 #include "Logger.h"
 #include "Threading.h"
 #include "Trace.h"
+#include "URI.h"
 #include "index/IndexAction.h"
 #include "index/MemIndex.h"
 #include "index/Serialization.h"
+#include "index/SymbolCollector.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/SHA1.h"
 #include <random>
+#include <string>
 
 using namespace llvm;
 namespace clang {
@@ -125,6 +133,142 @@ void BackgroundIndex::enqueueLocked(tool
       std::move(Cmd)));
 }
 
+static BackgroundIndex::FileDigest digest(StringRef Content) {
+  return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
+}
+
+static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
+                                                        FileID FID) {
+  bool Invalid = false;
+  StringRef Content = SM.getBufferData(FID, &Invalid);
+  if (Invalid)
+    return None;
+  return digest(Content);
+}
+
+// Resolves URI to file paths with cache.
+class URIToFileCache {
+public:
+  URIToFileCache(llvm::StringRef HintPath) : HintPath(HintPath) {}
+
+  llvm::StringRef resolve(llvm::StringRef FileURI) {
+    auto I = URIToPathCache.try_emplace(FileURI);
+    if (I.second) {
+      auto U = URI::parse(FileURI);
+      if (!U) {
+        elog("Failed to parse URI {0}: {1}", FileURI, U.takeError());
+        assert(false && "Failed to parse URI");
+        return "";
+      }
+      auto Path = URI::resolve(*U, HintPath);
+      if (!Path) {
+        elog("Failed to resolve URI {0}: {1}", FileURI, Path.takeError());
+        assert(false && "Failed to resolve URI");
+        return "";
+      }
+      I.first->second = *Path;
+    }
+    return I.first->second;
+  }
+
+private:
+  std::string HintPath;
+  llvm::StringMap<std::string> URIToPathCache;
+};
+
+/// Given index results from a TU, only update files in \p FilesToUpdate.
+void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
+                             RefSlab Refs,
+                             const StringMap<FileDigest> &FilesToUpdate) {
+  // Partition symbols/references into files.
+  struct File {
+    DenseSet<const Symbol *> Symbols;
+    DenseSet<const Ref *> Refs;
+  };
+  StringMap<File> Files;
+  URIToFileCache URICache(MainFile);
+  for (const auto &Sym : Symbols) {
+    if (Sym.CanonicalDeclaration) {
+      auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI);
+      if (FilesToUpdate.count(DeclPath) != 0)
+        Files[DeclPath].Symbols.insert(&Sym);
+    }
+    // For symbols with different declaration and definition locations, we store
+    // the full symbol in both the header file and the implementation file, so
+    // that merging can tell the preferred symbols (from canonical headers) from
+    // other symbols (e.g. forward declarations).
+    if (Sym.Definition &&
+        Sym.Definition.FileURI != Sym.CanonicalDeclaration.FileURI) {
+      auto DefPath = URICache.resolve(Sym.Definition.FileURI);
+      if (FilesToUpdate.count(DefPath) != 0)
+        Files[DefPath].Symbols.insert(&Sym);
+    }
+  }
+  DenseMap<const Ref *, SymbolID> RefToIDs;
+  for (const auto &SymRefs : Refs) {
+    for (const auto &R : SymRefs.second) {
+      auto Path = URICache.resolve(R.Location.FileURI);
+      if (FilesToUpdate.count(Path) != 0) {
+        auto &F = Files[Path];
+        RefToIDs[&R] = SymRefs.first;
+        F.Refs.insert(&R);
+      }
+    }
+  }
+
+  // Build and store new slabs for each updated file.
+  for (const auto &F : Files) {
+    StringRef Path = F.first();
+    vlog("Update symbols in {0}", Path);
+    SymbolSlab::Builder Syms;
+    RefSlab::Builder Refs;
+    for (const auto *S : F.second.Symbols)
+      Syms.insert(*S);
+    for (const auto *R : F.second.Refs)
+      Refs.insert(RefToIDs[R], *R);
+
+    std::lock_guard<std::mutex> Lock(DigestsMu);
+    // This can override a newer version that is added in another thread,
+    // if this thread sees the older version but finishes later. This should be
+    // rare in practice.
+    IndexedFileDigests[Path] = FilesToUpdate.lookup(Path);
+    IndexedSymbols.update(Path,
+                          make_unique<SymbolSlab>(std::move(Syms).build()),
+                          make_unique<RefSlab>(std::move(Refs).build()));
+  }
+}
+
+// Creates a filter to not collect index results from files with unchanged
+// digests.
+// \p FileDigests contains file digests for the current indexed files, and all changed files will be added to \p FilesToUpdate.
+decltype(SymbolCollector::Options::FileFilter) createFileFilter(
+    const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests,
+    llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) {
+  return [&FileDigests, &FilesToUpdate](const SourceManager &SM, FileID FID) {
+    StringRef Path;
+    if (const auto *F = SM.getFileEntryForID(FID))
+      Path = F->getName();
+    if (Path.empty())
+      return false; // Skip invalid files.
+    SmallString<128> AbsPath(Path);
+    if (std::error_code EC =
+            SM.getFileManager().getVirtualFileSystem()->makeAbsolute(AbsPath)) {
+      elog("Warning: could not make absolute file: {0}", EC.message());
+      return false; // Skip files without absolute path.
+    }
+    sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
+    auto Digest = digestFile(SM, FID);
+    if (!Digest)
+      return false;
+    auto D = FileDigests.find(AbsPath);
+    if (D != FileDigests.end() && D->second == Digest)
+      return false; // Skip files that haven't changed.
+
+    FilesToUpdate[AbsPath] = *Digest;
+    return true;
+  };
+}
+
 Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
   trace::Span Tracer("BackgroundIndex");
   SPAN_ATTACH(Tracer, "file", Cmd.Filename);
@@ -140,12 +284,18 @@ Error BackgroundIndex::index(tooling::Co
   auto Buf = FS->getBufferForFile(AbsolutePath);
   if (!Buf)
     return errorCodeToError(Buf.getError());
-  StringRef Contents = Buf->get()->getBuffer();
-  auto Hash = SHA1::hash({(const uint8_t *)Contents.data(), Contents.size()});
+  auto Hash = digest(Buf->get()->getBuffer());
+
+  // Take a snapshot of the digests to avoid locking for each file in the TU.
+  llvm::StringMap<FileDigest> DigestsSnapshot;
+  {
+    std::lock_guard<std::mutex> Lock(DigestsMu);
+    if (IndexedFileDigests.lookup(AbsolutePath) == Hash) {
+      vlog("No need to index {0}, already up to date", AbsolutePath);
+      return Error::success();
+    }
 
-  if (FileHash.lookup(AbsolutePath) == Hash) {
-    vlog("No need to index {0}, already up to date", AbsolutePath);
-    return Error::success();
+    DigestsSnapshot = IndexedFileDigests;
   }
 
   log("Indexing {0}", Cmd.Filename, toHex(Hash));
@@ -166,9 +316,11 @@ Error BackgroundIndex::index(tooling::Co
                              "Couldn't build compiler instance");
 
   SymbolCollector::Options IndexOpts;
+  IndexOpts.URISchemes = URISchemes;
+  StringMap<FileDigest> FilesToUpdate;
+  IndexOpts.FileFilter = createFileFilter(DigestsSnapshot, FilesToUpdate);
   SymbolSlab Symbols;
   RefSlab Refs;
-  IndexFileIn IndexData;
   auto Action = createStaticIndexingAction(
       IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); },
       [&](RefSlab R) { Refs = std::move(R); });
@@ -190,16 +342,20 @@ Error BackgroundIndex::index(tooling::Co
       Symbols.size(), Refs.numRefs());
   SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
   SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
-  // FIXME: partition the symbols by file rather than TU, to avoid duplication.
-  IndexedSymbols.update(AbsolutePath,
-                        llvm::make_unique<SymbolSlab>(std::move(Symbols)),
-                        llvm::make_unique<RefSlab>(std::move(Refs)));
-  FileHash[AbsolutePath] = Hash;
+  update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate);
+  {
+    // Make sure hash for the main file is always updated even if there is no
+    // index data in it.
+    std::lock_guard<std::mutex> Lock(DigestsMu);
+    IndexedFileDigests[AbsolutePath] = Hash;
+  }
 
   // FIXME: this should rebuild once-in-a-while, not after every file.
   //       At that point we should use Dex, too.
   vlog("Rebuilding automatic index");
-  reset(IndexedSymbols.buildIndex(IndexType::Light, URISchemes));
+  reset(IndexedSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge,
+                                  URISchemes));
+
   return Error::success();
 }
 

Modified: clang-tools-extra/trunk/clangd/index/Background.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.h?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.h (original)
+++ clang-tools-extra/trunk/clangd/index/Background.h Tue Nov  6 02:55:21 2018
@@ -15,6 +15,7 @@
 #include "index/FileIndex.h"
 #include "index/Index.h"
 #include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/SHA1.h"
 #include "llvm/Support/Threading.h"
 #include <condition_variable>
@@ -34,8 +35,7 @@ class BackgroundIndex : public SwapIndex
 public:
   // FIXME: resource-dir injection should be hoisted somewhere common.
   BackgroundIndex(Context BackgroundContext, StringRef ResourceDir,
-                  const FileSystemProvider &,
-                  ArrayRef<std::string> URISchemes = {},
+                  const FileSystemProvider &, ArrayRef<std::string> URISchemes,
                   size_t ThreadPoolSize = llvm::hardware_concurrency());
   ~BackgroundIndex(); // Blocks while the current task finishes.
 
@@ -54,7 +54,13 @@ public:
   // Wait until the queue is empty, to allow deterministic testing.
   void blockUntilIdleForTest();
 
+  using FileDigest = decltype(llvm::SHA1::hash({}));
+
 private:
+  /// Given index results from a TU, only update files in \p FilesToUpdate.
+  void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs,
+              const llvm::StringMap<FileDigest> &FilesToUpdate);
+
   // configuration
   std::string ResourceDir;
   const FileSystemProvider &FSProvider;
@@ -63,9 +69,10 @@ private:
 
   // index state
   llvm::Error index(tooling::CompileCommand);
-  FileSymbols IndexedSymbols; // Index contents.
-  using Hash = decltype(llvm::SHA1::hash({}));
-  llvm::StringMap<Hash> FileHash; // Digest of indexed file.
+
+  FileSymbols IndexedSymbols;
+  llvm::StringMap<FileDigest> IndexedFileDigests; // Key is absolute file path.
+  std::mutex DigestsMu;
 
   // queue management
   using Task = std::function<void()>;

Modified: clang-tools-extra/trunk/clangd/index/FileIndex.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/FileIndex.cpp?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/FileIndex.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/FileIndex.cpp Tue Nov  6 02:55:21 2018
@@ -12,11 +12,16 @@
 #include "Logger.h"
 #include "SymbolCollector.h"
 #include "index/Index.h"
+#include "index/MemIndex.h"
 #include "index/Merge.h"
 #include "index/dex/Dex.h"
 #include "clang/Index/IndexingAction.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include <memory>
 
 using namespace llvm;
@@ -101,7 +106,8 @@ void FileSymbols::update(PathRef Path, s
 }
 
 std::unique_ptr<SymbolIndex>
-FileSymbols::buildIndex(IndexType Type, ArrayRef<std::string> URISchemes) {
+FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle,
+                        ArrayRef<std::string> URISchemes) {
   std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
   std::vector<std::shared_ptr<RefSlab>> RefSlabs;
   {
@@ -112,9 +118,34 @@ FileSymbols::buildIndex(IndexType Type,
       RefSlabs.push_back(FileAndRefs.second);
   }
   std::vector<const Symbol *> AllSymbols;
-  for (const auto &Slab : SymbolSlabs)
-    for (const auto &Sym : *Slab)
-      AllSymbols.push_back(&Sym);
+  std::vector<Symbol> SymsStorage;
+  switch (DuplicateHandle) {
+  case DuplicateHandling::Merge: {
+    DenseMap<SymbolID, Symbol> Merged;
+    for (const auto &Slab : SymbolSlabs) {
+      for (const auto &Sym : *Slab) {
+        auto I = Merged.try_emplace(Sym.ID, Sym);
+        if (!I.second)
+          I.first->second = mergeSymbol(std::move(I.first->second), Sym);
+      }
+    }
+    SymsStorage.reserve(Merged.size());
+    for (auto &Sym : Merged) {
+      SymsStorage.push_back(std::move(Sym.second));
+      AllSymbols.push_back(&SymsStorage.back());
+    }
+    // FIXME: aggregate symbol reference count based on references.
+    break;
+  }
+  case DuplicateHandling::PickOne: {
+    llvm::DenseSet<SymbolID> AddedSymbols;
+    for (const auto &Slab : SymbolSlabs)
+      for (const auto &Sym : *Slab)
+        if (AddedSymbols.insert(Sym.ID).second)
+          AllSymbols.push_back(&Sym);
+    break;
+  }
+  }
 
   std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID.
   DenseMap<SymbolID, ArrayRef<Ref>> AllRefs;
@@ -140,7 +171,8 @@ FileSymbols::buildIndex(IndexType Type,
     }
   }
 
-  size_t StorageSize = RefsStorage.size() * sizeof(Ref);
+  size_t StorageSize =
+      RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol);
   for (const auto &Slab : SymbolSlabs)
     StorageSize += Slab->bytes();
   for (const auto &RefSlab : RefSlabs)
@@ -152,13 +184,13 @@ FileSymbols::buildIndex(IndexType Type,
     return llvm::make_unique<MemIndex>(
         make_pointee_range(AllSymbols), std::move(AllRefs),
         std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
-                        std::move(RefsStorage)),
+                        std::move(RefsStorage), std::move(SymsStorage)),
         StorageSize);
   case IndexType::Heavy:
     return llvm::make_unique<dex::Dex>(
         make_pointee_range(AllSymbols), std::move(AllRefs),
         std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
-                        std::move(RefsStorage)),
+                        std::move(RefsStorage), std::move(SymsStorage)),
         StorageSize, std::move(URISchemes));
   }
   llvm_unreachable("Unknown clangd::IndexType");
@@ -176,8 +208,9 @@ void FileIndex::updatePreamble(PathRef P
   PreambleSymbols.update(Path,
                          llvm::make_unique<SymbolSlab>(std::move(Symbols)),
                          llvm::make_unique<RefSlab>());
-  PreambleIndex.reset(PreambleSymbols.buildIndex(
-      UseDex ? IndexType::Heavy : IndexType::Light, URISchemes));
+  PreambleIndex.reset(
+      PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light,
+                                 DuplicateHandling::PickOne, URISchemes));
 }
 
 void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
@@ -185,7 +218,8 @@ void FileIndex::updateMain(PathRef Path,
   MainFileSymbols.update(
       Path, llvm::make_unique<SymbolSlab>(std::move(Contents.first)),
       llvm::make_unique<RefSlab>(std::move(Contents.second)));
-  MainFileIndex.reset(MainFileSymbols.buildIndex(IndexType::Light, URISchemes));
+  MainFileIndex.reset(MainFileSymbols.buildIndex(
+      IndexType::Light, DuplicateHandling::PickOne, URISchemes));
 }
 
 } // namespace clangd

Modified: clang-tools-extra/trunk/clangd/index/FileIndex.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/FileIndex.h?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/FileIndex.h (original)
+++ clang-tools-extra/trunk/clangd/index/FileIndex.h Tue Nov  6 02:55:21 2018
@@ -34,6 +34,14 @@ enum class IndexType {
   Heavy,
 };
 
+/// How to handle duplicated symbols across multiple files.
+enum class DuplicateHandling {
+  // Pick a random symbol. Less accurate but faster.
+  PickOne,
+  // Merge symbols. More accurate but slower.
+  Merge,
+};
+
 /// A container of Symbols from several source files. It can be updated
 /// at source-file granularity, replacing all symbols from one file with a new
 /// set.
@@ -56,7 +64,9 @@ public:
 
   // The index keeps the symbols alive.
   std::unique_ptr<SymbolIndex>
-  buildIndex(IndexType, ArrayRef<std::string> URISchemes = {});
+  buildIndex(IndexType,
+             DuplicateHandling DuplicateHandle = DuplicateHandling::PickOne,
+             ArrayRef<std::string> URISchemes = {});
 
 private:
   mutable std::mutex Mutex;

Modified: clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp Tue Nov  6 02:55:21 2018
@@ -203,6 +203,17 @@ getTokenRange(SourceLocation TokLoc, con
           CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
 }
 
+bool shouldIndexFile(const SourceManager &SM, FileID FID,
+                     const SymbolCollector::Options &Opts,
+                     llvm::DenseMap<FileID, bool> *FilesToIndexCache) {
+  if (!Opts.FileFilter)
+    return true;
+  auto I = FilesToIndexCache->try_emplace(FID);
+  if (I.second)
+    I.first->second = Opts.FileFilter(SM, FID);
+  return I.first->second;
+}
+
 // Return the symbol location of the token at \p TokLoc.
 Optional<SymbolLocation> getTokenLocation(SourceLocation TokLoc,
                                           const SourceManager &SM,
@@ -392,7 +403,8 @@ bool SymbolCollector::handleMacroOccuren
   assert(PP.get());
 
   const auto &SM = PP->getSourceManager();
-  if (SM.isInMainFile(SM.getExpansionLoc(MI->getDefinitionLoc())))
+  auto DefLoc = MI->getDefinitionLoc();
+  if (SM.isInMainFile(SM.getExpansionLoc(DefLoc)))
     return true;
   // Header guards are not interesting in index. Builtin macros don't have
   // useful locations and are not needed for code completions.
@@ -426,8 +438,10 @@ bool SymbolCollector::handleMacroOccuren
   S.Flags |= Symbol::IndexedForCodeCompletion;
   S.SymInfo = index::getSymbolInfoForMacro(*MI);
   std::string FileURI;
-  if (auto DeclLoc = getTokenLocation(MI->getDefinitionLoc(), SM, Opts,
-                                      PP->getLangOpts(), FileURI))
+  // FIXME: use the result to filter out symbols.
+  shouldIndexFile(SM, SM.getFileID(Loc), Opts, &FilesToIndexCache);
+  if (auto DeclLoc =
+          getTokenLocation(DefLoc, SM, Opts, PP->getLangOpts(), FileURI))
     S.CanonicalDeclaration = *DeclLoc;
 
   CodeCompletionResult SymbolCompletion(Name);
@@ -439,9 +453,8 @@ bool SymbolCollector::handleMacroOccuren
 
   std::string Include;
   if (Opts.CollectIncludePath && shouldCollectIncludePath(S.SymInfo.Kind)) {
-    if (auto Header =
-            getIncludeHeader(Name->getName(), SM,
-                             SM.getExpansionLoc(MI->getDefinitionLoc()), Opts))
+    if (auto Header = getIncludeHeader(Name->getName(), SM,
+                                       SM.getExpansionLoc(DefLoc), Opts))
       Include = std::move(*Header);
   }
   S.Signature = Signature;
@@ -503,6 +516,8 @@ void SymbolCollector::finish() {
       if (auto ID = getSymbolID(It.first)) {
         for (const auto &LocAndRole : It.second) {
           auto FileID = SM.getFileID(LocAndRole.first);
+          // FIXME: use the result to filter out references.
+          shouldIndexFile(SM, FileID, Opts, &FilesToIndexCache);
           if (auto FileURI = GetURI(FileID)) {
             auto Range =
                 getTokenRange(LocAndRole.first, SM, ASTCtx->getLangOpts());
@@ -521,6 +536,7 @@ void SymbolCollector::finish() {
   ReferencedDecls.clear();
   ReferencedMacros.clear();
   DeclRefs.clear();
+  FilesToIndexCache.clear();
 }
 
 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND,
@@ -541,8 +557,11 @@ const Symbol *SymbolCollector::addDeclar
     S.Flags |= Symbol::ImplementationDetail;
   S.SymInfo = index::getSymbolInfo(&ND);
   std::string FileURI;
-  if (auto DeclLoc = getTokenLocation(findNameLoc(&ND), SM, Opts,
-                                      ASTCtx->getLangOpts(), FileURI))
+  auto Loc = findNameLoc(&ND);
+  // FIXME: use the result to filter out symbols.
+  shouldIndexFile(SM, SM.getFileID(Loc), Opts, &FilesToIndexCache);
+  if (auto DeclLoc =
+          getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
     S.CanonicalDeclaration = *DeclLoc;
 
   // Add completion info.
@@ -593,9 +612,12 @@ void SymbolCollector::addDefinition(cons
   // in clang::index. We should only see one definition.
   Symbol S = DeclSym;
   std::string FileURI;
-  if (auto DefLoc = getTokenLocation(findNameLoc(&ND),
-                                     ND.getASTContext().getSourceManager(),
-                                     Opts, ASTCtx->getLangOpts(), FileURI))
+  auto Loc = findNameLoc(&ND);
+  const auto &SM = ND.getASTContext().getSourceManager();
+  // FIXME: use the result to filter out symbols.
+  shouldIndexFile(SM, SM.getFileID(Loc), Opts, &FilesToIndexCache);
+  if (auto DefLoc =
+          getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
     S.Definition = *DefLoc;
   Symbols.insert(S);
 }

Modified: clang-tools-extra/trunk/clangd/index/SymbolCollector.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolCollector.h?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolCollector.h (original)
+++ clang-tools-extra/trunk/clangd/index/SymbolCollector.h Tue Nov  6 02:55:21 2018
@@ -13,9 +13,13 @@
 #include "Index.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Index/IndexDataConsumer.h"
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
+#include "llvm/ADT/DenseMap.h"
+#include <functional>
 
 namespace clang {
 namespace clangd {
@@ -69,6 +73,9 @@ public:
     /// collect macros. For example, `indexTopLevelDecls` will not index any
     /// macro even if this is true.
     bool CollectMacro = false;
+    /// If this is set, only collect symbols/references from a file if
+    /// `FileFilter(SM, FID)` is true. If not set, all files are indexed.
+    std::function<bool(const SourceManager &, FileID)> FileFilter = nullptr;
   };
 
   SymbolCollector(Options Opts);
@@ -125,6 +132,8 @@ private:
   // canonical by clang but should not be considered canonical in the index
   // unless it's a definition.
   llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls;
+  // Cache whether to index a file or not.
+  llvm::DenseMap<FileID, bool> FilesToIndexCache;
 };
 
 } // namespace clangd

Modified: clang-tools-extra/trunk/clangd/index/dex/Dex.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Dex.h?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Dex.h (original)
+++ clang-tools-extra/trunk/clangd/index/dex/Dex.h Tue Nov  6 02:55:21 2018
@@ -52,10 +52,8 @@ public:
       SymbolCollector::Options Opts;
       URISchemes = Opts.URISchemes;
     }
-    llvm::DenseSet<SymbolID> SeenIDs;
     for (auto &&Sym : Symbols)
-      if (SeenIDs.insert(Sym.ID).second)
-        this->Symbols.push_back(&Sym);
+      this->Symbols.push_back(&Sym);
     for (auto &&Ref : Refs)
       this->Refs.try_emplace(Ref.first, Ref.second);
     buildIndex();

Modified: clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp Tue Nov  6 02:55:21 2018
@@ -4,33 +4,76 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+using testing::_;
+using testing::AllOf;
+using testing::Not;
 using testing::UnorderedElementsAre;
 
 namespace clang {
 namespace clangd {
 
 MATCHER_P(Named, N, "") { return arg.Name == N; }
+MATCHER(Declared, "") { return !arg.CanonicalDeclaration.FileURI.empty(); }
+MATCHER(Defined, "") { return !arg.Definition.FileURI.empty(); }
+
+MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; }
+testing::Matcher<const RefSlab &>
+RefsAre(std::vector<testing::Matcher<Ref>> Matchers) {
+  return ElementsAre(testing::Pair(_, UnorderedElementsAreArray(Matchers)));
+}
 
 TEST(BackgroundIndexTest, IndexTwoFiles) {
   MockFSProvider FS;
   // a.h yields different symbols when included by A.cc vs B.cc.
-  // Currently we store symbols for each TU, so we get both.
-  FS.Files[testPath("root/A.h")] = "void a_h(); void NAME(){}";
-  FS.Files[testPath("root/A.cc")] = "#include \"A.h\"";
-  FS.Files[testPath("root/B.cc")] = "#define NAME bar\n#include \"A.h\"";
-  BackgroundIndex Idx(Context::empty(), "", FS);
+  FS.Files[testPath("root/A.h")] = R"cpp(
+      void common();
+      void f_b();
+      #if A
+        class A_CC {};
+      #else
+        class B_CC{};
+      #endif
+      )cpp";
+  FS.Files[testPath("root/A.cc")] =
+      "#include \"A.h\"\nvoid g() { (void)common; }";
+  FS.Files[testPath("root/B.cc")] =
+      R"cpp(
+      #define A 0
+      #include "A.h"
+      void f_b() {
+        (void)common;
+      })cpp";
+  BackgroundIndex Idx(Context::empty(), "", FS, /*URISchmes=*/{"unittest"});
 
   tooling::CompileCommand Cmd;
   Cmd.Filename = testPath("root/A.cc");
   Cmd.Directory = testPath("root");
-  Cmd.CommandLine = {"clang++", "-DNAME=foo", testPath("root/A.cc")};
+  Cmd.CommandLine = {"clang++", "-DA=1", testPath("root/A.cc")};
   Idx.enqueue(testPath("root"), Cmd);
-  Cmd.CommandLine.back() = Cmd.Filename = testPath("root/B.cc");
+
+  Idx.blockUntilIdleForTest();
+  EXPECT_THAT(
+      runFuzzyFind(Idx, ""),
+      UnorderedElementsAre(Named("common"), Named("A_CC"),
+                           AllOf(Named("f_b"), Declared(), Not(Defined()))));
+
+  Cmd.Filename = testPath("root/B.cc");
+  Cmd.CommandLine = {"clang++", Cmd.Filename};
   Idx.enqueue(testPath("root"), Cmd);
 
   Idx.blockUntilIdleForTest();
+  // B_CC is dropped as we don't collect symbols from A.h in this compilation.
   EXPECT_THAT(runFuzzyFind(Idx, ""),
-              UnorderedElementsAre(Named("a_h"), Named("foo"), Named("bar")));
+              UnorderedElementsAre(Named("common"), Named("A_CC"),
+                                   AllOf(Named("f_b"), Declared(), Defined())));
+
+  auto Syms = runFuzzyFind(Idx, "common");
+  EXPECT_THAT(Syms, UnorderedElementsAre(Named("common")));
+  auto Common = *Syms.begin();
+  EXPECT_THAT(getRefs(Idx, Common.ID),
+              RefsAre({FileURI("unittest:///root/A.h"),
+                       FileURI("unittest:///root/A.cc"),
+                       FileURI("unittest:///root/B.cc")}));
 }
 
 } // namespace clangd

Modified: clang-tools-extra/trunk/unittests/clangd/DexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/DexTests.cpp?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/DexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/DexTests.cpp Tue Nov  6 02:55:21 2018
@@ -491,15 +491,6 @@ TEST(Dex, FuzzyFind) {
                                    "other::A"));
 }
 
-TEST(DexTest, DexDeduplicate) {
-  std::vector<Symbol> Symbols = {symbol("1"), symbol("2"), symbol("3"),
-                                 symbol("2") /* duplicate */};
-  FuzzyFindRequest Req;
-  Req.Query = "2";
-  Dex I(Symbols, RefSlab(), URISchemes);
-  EXPECT_THAT(match(I, Req), ElementsAre("2"));
-}
-
 TEST(DexTest, DexLimitedNumMatches) {
   auto I = Dex::build(generateNumSymbols(0, 100), RefSlab(), URISchemes);
   FuzzyFindRequest Req;

Modified: clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp Tue Nov  6 02:55:21 2018
@@ -14,6 +14,7 @@
 #include "TestFS.h"
 #include "TestTU.h"
 #include "index/FileIndex.h"
+#include "index/Index.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Frontend/Utils.h"
@@ -39,6 +40,7 @@ MATCHER_P(RefRange, Range, "") {
 }
 MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; }
 MATCHER_P(DeclURI, U, "") { return arg.CanonicalDeclaration.FileURI == U; }
+MATCHER_P(DefURI, U, "") { return arg.Definition.FileURI == U; }
 MATCHER_P(QName, N, "") { return (arg.Scope + arg.Name).str() == N; }
 
 using namespace llvm;
@@ -73,14 +75,6 @@ std::unique_ptr<RefSlab> refSlab(const S
   return llvm::make_unique<RefSlab>(std::move(Slab).build());
 }
 
-RefSlab getRefs(const SymbolIndex &I, SymbolID ID) {
-  RefsRequest Req;
-  Req.IDs = {ID};
-  RefSlab::Builder Slab;
-  I.refs(Req, [&](const Ref &S) { Slab.insert(ID, S); });
-  return std::move(Slab).build();
-}
-
 TEST(FileSymbolsTest, UpdateAndGet) {
   FileSymbols FS;
   EXPECT_THAT(runFuzzyFind(*FS.buildIndex(IndexType::Light), ""), IsEmpty());
@@ -102,6 +96,27 @@ TEST(FileSymbolsTest, Overlap) {
                                      QName("4"), QName("5")));
 }
 
+TEST(FileSymbolsTest, MergeOverlap) {
+  FileSymbols FS;
+  auto OneSymboSlab = [](Symbol Sym) {
+    SymbolSlab::Builder S;
+    S.insert(Sym);
+    return make_unique<SymbolSlab>(std::move(S).build());
+  };
+  auto X1 = symbol("x");
+  X1.CanonicalDeclaration.FileURI = "file:///x1";
+  auto X2 = symbol("x");
+  X2.Definition.FileURI = "file:///x2";
+
+  FS.update("f1", OneSymboSlab(X1), nullptr);
+  FS.update("f2", OneSymboSlab(X2), nullptr);
+  for (auto Type : {IndexType::Light, IndexType::Heavy})
+    EXPECT_THAT(
+        runFuzzyFind(*FS.buildIndex(Type, DuplicateHandling::Merge), "x"),
+        UnorderedElementsAre(
+            AllOf(QName("x"), DeclURI("file:///x1"), DefURI("file:///x2"))));
+}
+
 TEST(FileSymbolsTest, SnapshotAliveAfterRemove) {
   FileSymbols FS;
 

Modified: clang-tools-extra/trunk/unittests/clangd/SyncAPI.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SyncAPI.cpp?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SyncAPI.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/SyncAPI.cpp Tue Nov  6 02:55:21 2018
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SyncAPI.h"
+#include "index/Index.h"
 
 using namespace llvm;
 namespace clang {
@@ -138,5 +139,14 @@ SymbolSlab runFuzzyFind(const SymbolInde
   return std::move(Builder).build();
 }
 
+RefSlab getRefs(const SymbolIndex &Index, SymbolID ID) {
+  RefsRequest Req;
+  Req.IDs = {ID};
+  RefSlab::Builder Slab;
+  Index.refs(Req, [&](const Ref &S) { Slab.insert(ID, S); });
+  return std::move(Slab).build();
+}
+
+
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/unittests/clangd/SyncAPI.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SyncAPI.h?rev=346221&r1=346220&r2=346221&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SyncAPI.h (original)
+++ clang-tools-extra/trunk/unittests/clangd/SyncAPI.h Tue Nov  6 02:55:21 2018
@@ -52,6 +52,7 @@ runDocumentSymbols(ClangdServer &Server,
 
 SymbolSlab runFuzzyFind(const SymbolIndex &Index, StringRef Query);
 SymbolSlab runFuzzyFind(const SymbolIndex &Index, const FuzzyFindRequest &Req);
+RefSlab getRefs(const SymbolIndex &Index, SymbolID ID);
 
 } // namespace clangd
 } // namespace clang




More information about the cfe-commits mailing list