[clang-tools-extra] dffa9df - [clangd] Shard preamble symbols in dynamic index

Kadir Cetinkaya via cfe-commits cfe-commits at lists.llvm.org
Wed Apr 15 00:15:50 PDT 2020


Author: Kadir Cetinkaya
Date: 2020-04-15T09:10:10+02:00
New Revision: dffa9dfbda56820c02e357ad34c24ce8759b4d26

URL: https://github.com/llvm/llvm-project/commit/dffa9dfbda56820c02e357ad34c24ce8759b4d26
DIFF: https://github.com/llvm/llvm-project/commit/dffa9dfbda56820c02e357ad34c24ce8759b4d26.diff

LOG: [clangd] Shard preamble symbols in dynamic index

Summary:
This reduces memory usage by dynamic index from more than 400MB to 32MB
when all files in clang-tools-extra/clangd/*.cpp are active in clangd.

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D77732

Added: 
    

Modified: 
    clang-tools-extra/clangd/index/Background.cpp
    clang-tools-extra/clangd/index/FileIndex.cpp
    clang-tools-extra/clangd/index/FileIndex.h
    clang-tools-extra/clangd/index/SymbolCollector.cpp
    clang-tools-extra/clangd/unittests/FileIndexTests.cpp
    clang-tools-extra/clangd/unittests/TestTU.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp
index 1c26dd48093e..4c5719d0526c 100644
--- a/clang-tools-extra/clangd/index/Background.cpp
+++ b/clang-tools-extra/clangd/index/Background.cpp
@@ -61,51 +61,6 @@ namespace clang {
 namespace clangd {
 namespace {
 
-// Resolves URI to file paths with cache.
-class URIToFileCache {
-public:
-  URIToFileCache(llvm::StringRef HintPath) : HintPath(HintPath) {}
-
-  llvm::StringRef resolve(llvm::StringRef FileURI) {
-    auto I = URIToPathCache.try_emplace(FileURI);
-    if (I.second) {
-      auto Path = URI::resolve(FileURI, HintPath);
-      if (!Path) {
-        elog("Failed to resolve URI {0}: {1}", FileURI, Path.takeError());
-        assert(false && "Failed to resolve URI");
-        return "";
-      }
-      I.first->second = *Path;
-    }
-    return I.first->second;
-  }
-
-private:
-  std::string HintPath;
-  llvm::StringMap<std::string> URIToPathCache;
-};
-
-// We keep only the node "U" and its edges. Any node other than "U" will be
-// empty in the resultant graph.
-IncludeGraph getSubGraph(const URI &U, const IncludeGraph &FullGraph) {
-  IncludeGraph IG;
-
-  std::string FileURI = U.toString();
-  auto Entry = IG.try_emplace(FileURI).first;
-  auto &Node = Entry->getValue();
-  Node = FullGraph.lookup(Entry->getKey());
-  Node.URI = Entry->getKey();
-
-  // URIs inside nodes must point into the keys of the same IncludeGraph.
-  for (auto &Include : Node.DirectIncludes) {
-    auto I = IG.try_emplace(Include).first;
-    I->getValue().URI = I->getKey();
-    Include = I->getKey();
-  }
-
-  return IG;
-}
-
 // We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or
 // relative to Cmd.Directory, which might not be the same as current working
 // directory.
@@ -219,108 +174,44 @@ void BackgroundIndex::update(
     llvm::StringRef MainFile, IndexFileIn Index,
     const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
     bool HadErrors) {
-  // Partition symbols/references into files.
-  struct File {
-    llvm::DenseSet<const Symbol *> Symbols;
-    llvm::DenseSet<const Ref *> Refs;
-    llvm::DenseSet<const Relation *> Relations;
-    FileDigest Digest;
-  };
-  llvm::StringMap<File> Files;
-  URIToFileCache URICache(MainFile);
+  llvm::StringMap<FileDigest> FilesToUpdate;
   for (const auto &IndexIt : *Index.Sources) {
     const auto &IGN = IndexIt.getValue();
     // Note that sources do not contain any information regarding missing
     // headers, since we don't even know what absolute path they should fall in.
-    const auto AbsPath = URICache.resolve(IGN.URI);
+    auto AbsPath = llvm::cantFail(URI::resolve(IGN.URI, MainFile),
+                                  "Failed to resovle URI");
     const auto DigestIt = ShardVersionsSnapshot.find(AbsPath);
     // File has 
diff erent contents, or indexing was successful this time.
     if (DigestIt == ShardVersionsSnapshot.end() ||
         DigestIt->getValue().Digest != IGN.Digest ||
         (DigestIt->getValue().HadErrors && !HadErrors))
-      Files.try_emplace(AbsPath).first->getValue().Digest = IGN.Digest;
-  }
-  // This map is used to figure out where to store relations.
-  llvm::DenseMap<SymbolID, File *> SymbolIDToFile;
-  for (const auto &Sym : *Index.Symbols) {
-    if (Sym.CanonicalDeclaration) {
-      auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI);
-      const auto FileIt = Files.find(DeclPath);
-      if (FileIt != Files.end()) {
-        FileIt->second.Symbols.insert(&Sym);
-        SymbolIDToFile[Sym.ID] = &FileIt->second;
-      }
-    }
-    // For symbols with 
diff erent declaration and definition locations, we store
-    // the full symbol in both the header file and the implementation file, so
-    // that merging can tell the preferred symbols (from canonical headers) from
-    // other symbols (e.g. forward declarations).
-    if (Sym.Definition &&
-        Sym.Definition.FileURI != Sym.CanonicalDeclaration.FileURI) {
-      auto DefPath = URICache.resolve(Sym.Definition.FileURI);
-      const auto FileIt = Files.find(DefPath);
-      if (FileIt != Files.end())
-        FileIt->second.Symbols.insert(&Sym);
-    }
-  }
-  llvm::DenseMap<const Ref *, SymbolID> RefToIDs;
-  for (const auto &SymRefs : *Index.Refs) {
-    for (const auto &R : SymRefs.second) {
-      auto Path = URICache.resolve(R.Location.FileURI);
-      const auto FileIt = Files.find(Path);
-      if (FileIt != Files.end()) {
-        auto &F = FileIt->getValue();
-        RefToIDs[&R] = SymRefs.first;
-        F.Refs.insert(&R);
-      }
-    }
-  }
-  for (const auto &Rel : *Index.Relations) {
-    const auto FileIt = SymbolIDToFile.find(Rel.Subject);
-    if (FileIt != SymbolIDToFile.end())
-      FileIt->second->Relations.insert(&Rel);
+      FilesToUpdate[AbsPath] = IGN.Digest;
   }
 
-  // Build and store new slabs for each updated file.
-  for (const auto &FileIt : Files) {
-    llvm::StringRef Path = FileIt.getKey();
-    SymbolSlab::Builder Syms;
-    RefSlab::Builder Refs;
-    RelationSlab::Builder Relations;
-    for (const auto *S : FileIt.second.Symbols)
-      Syms.insert(*S);
-    for (const auto *R : FileIt.second.Refs)
-      Refs.insert(RefToIDs[R], *R);
-    for (const auto *Rel : FileIt.second.Relations)
-      Relations.insert(*Rel);
-    auto SS = std::make_unique<SymbolSlab>(std::move(Syms).build());
-    auto RS = std::make_unique<RefSlab>(std::move(Refs).build());
-    auto RelS = std::make_unique<RelationSlab>(std::move(Relations).build());
-    auto IG = std::make_unique<IncludeGraph>(
-        getSubGraph(URI::create(Path), Index.Sources.getValue()));
+  // Shard slabs into files.
+  FileShardedIndex ShardedIndex(std::move(Index), MainFile);
 
-    // We need to store shards before updating the index, since the latter
-    // consumes slabs.
-    // FIXME: Also skip serializing the shard if it is already up-to-date.
-    BackgroundIndexStorage *IndexStorage = IndexStorageFactory(Path);
-    IndexFileOut Shard;
-    Shard.Symbols = SS.get();
-    Shard.Refs = RS.get();
-    Shard.Relations = RelS.get();
-    Shard.Sources = IG.get();
+  // Build and store new slabs for each updated file.
+  for (const auto &FileIt : FilesToUpdate) {
+    PathRef Path = FileIt.first();
+    auto IF = ShardedIndex.getShard(Path);
 
     // Only store command line hash for main files of the TU, since our
     // current model keeps only one version of a header file.
-    if (Path == MainFile)
-      Shard.Cmd = Index.Cmd.getPointer();
+    if (Path != MainFile)
+      IF.Cmd.reset();
 
-    if (auto Error = IndexStorage->storeShard(Path, Shard))
+    // We need to store shards before updating the index, since the latter
+    // consumes slabs.
+    // FIXME: Also skip serializing the shard if it is already up-to-date.
+    if (auto Error = IndexStorageFactory(Path)->storeShard(Path, IF))
       elog("Failed to write background-index shard for file {0}: {1}", Path,
            std::move(Error));
 
     {
       std::lock_guard<std::mutex> Lock(ShardVersionsMu);
-      auto Hash = FileIt.second.Digest;
+      const auto &Hash = FileIt.getValue();
       auto DigestIt = ShardVersions.try_emplace(Path);
       ShardVersion &SV = DigestIt.first->second;
       // Skip if file is already up to date, unless previous index was broken
@@ -333,8 +224,11 @@ void BackgroundIndex::update(
       // This can override a newer version that is added in another thread, if
       // this thread sees the older version but finishes later. This should be
       // rare in practice.
-      IndexedSymbols.update(Path, std::move(SS), std::move(RS), std::move(RelS),
-                            Path == MainFile);
+      IndexedSymbols.update(
+          Path, std::make_unique<SymbolSlab>(std::move(*IF.Symbols)),
+          std::make_unique<RefSlab>(std::move(*IF.Refs)),
+          std::make_unique<RelationSlab>(std::move(*IF.Relations)),
+          Path == MainFile);
     }
   }
 }

diff  --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp
index 92e7316105c7..d644268b8da9 100644
--- a/clang-tools-extra/clangd/index/FileIndex.cpp
+++ b/clang-tools-extra/clangd/index/FileIndex.cpp
@@ -10,11 +10,17 @@
 #include "CollectMacros.h"
 #include "Logger.h"
 #include "ParsedAST.h"
+#include "Path.h"
 #include "SymbolCollector.h"
 #include "index/CanonicalIncludes.h"
 #include "index/Index.h"
 #include "index/MemIndex.h"
 #include "index/Merge.h"
+#include "index/Ref.h"
+#include "index/Relation.h"
+#include "index/Serialization.h"
+#include "index/Symbol.h"
+#include "index/SymbolID.h"
 #include "index/SymbolOrigin.h"
 #include "index/dex/Dex.h"
 #include "clang/AST/ASTContext.h"
@@ -23,19 +29,24 @@
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
 #include <memory>
+#include <tuple>
+#include <utility>
+#include <vector>
 
 namespace clang {
 namespace clangd {
+namespace {
 
-static SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
-                              llvm::ArrayRef<Decl *> DeclsToIndex,
-                              const MainFileMacros *MacroRefsToIndex,
-                              const CanonicalIncludes &Includes,
-                              bool IsIndexMainAST, llvm::StringRef Version) {
+SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
+                       llvm::ArrayRef<Decl *> DeclsToIndex,
+                       const MainFileMacros *MacroRefsToIndex,
+                       const CanonicalIncludes &Includes, bool IsIndexMainAST,
+                       llvm::StringRef Version) {
   SymbolCollector::Options CollectorOpts;
   CollectorOpts.CollectIncludePath = true;
   CollectorOpts.Includes = &Includes;
@@ -85,6 +96,131 @@ static SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
                          std::move(Relations));
 }
 
+// Resolves URI to file paths with cache.
+class URIToFileCache {
+public:
+  URIToFileCache(PathRef HintPath) : HintPath(HintPath) {}
+
+  llvm::StringRef operator[](llvm::StringRef FileURI) {
+    if (FileURI.empty())
+      return "";
+    auto I = URIToPathCache.try_emplace(FileURI);
+    if (I.second) {
+      I.first->second = llvm::cantFail(URI::resolve(FileURI, HintPath),
+                                       "Failed to resolve URI");
+    }
+    return I.first->second;
+  }
+
+private:
+  PathRef HintPath;
+  llvm::StringMap<std::string> URIToPathCache;
+};
+
+// We keep only the node "U" and its edges. Any node other than "U" will be
+// empty in the resultant graph.
+IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) {
+  IncludeGraph IG;
+
+  auto Entry = IG.try_emplace(URI).first;
+  auto &Node = Entry->getValue();
+  Node = FullGraph.lookup(Entry->getKey());
+  Node.URI = Entry->getKey();
+
+  // URIs inside nodes must point into the keys of the same IncludeGraph.
+  for (auto &Include : Node.DirectIncludes) {
+    auto I = IG.try_emplace(Include).first;
+    I->getValue().URI = I->getKey();
+    Include = I->getKey();
+  }
+  return IG;
+}
+} // namespace
+
+FileShardedIndex::FileShardedIndex(IndexFileIn Input, PathRef HintPath)
+    : Index(std::move(Input)) {
+  URIToFileCache UriToFile(HintPath);
+  // Used to build RelationSlabs.
+  llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile;
+
+  // Attribute each Symbol to both their declaration and definition locations.
+  if (Index.Symbols) {
+    for (const auto &S : *Index.Symbols) {
+      auto File = UriToFile[S.CanonicalDeclaration.FileURI];
+      auto It = Shards.try_emplace(File);
+      It.first->getValue().Symbols.insert(&S);
+      SymbolIDToFile[S.ID] = &It.first->getValue();
+      // Only bother if definition file is 
diff erent than declaration file.
+      if (S.Definition &&
+          S.Definition.FileURI != S.CanonicalDeclaration.FileURI) {
+        auto File = UriToFile[S.Definition.FileURI];
+        auto It = Shards.try_emplace(File);
+        It.first->getValue().Symbols.insert(&S);
+      }
+    }
+  }
+  // Attribute references into each file they occured in.
+  if (Index.Refs) {
+    for (const auto &SymRefs : *Index.Refs) {
+      for (const auto &R : SymRefs.second) {
+        auto File = UriToFile[R.Location.FileURI];
+        const auto It = Shards.try_emplace(File);
+        It.first->getValue().Refs.insert(&R);
+        RefToSymID[&R] = SymRefs.first;
+      }
+    }
+  }
+  // Attribute relations to the file declaraing their Subject as Object might
+  // not have been indexed, see SymbolCollector::processRelations for details.
+  if (Index.Relations) {
+    for (const auto &R : *Index.Relations) {
+      auto *File = SymbolIDToFile.lookup(R.Subject);
+      assert(File && "unknown subject in relation");
+      File->Relations.insert(&R);
+    }
+  }
+  // Store only the direct includes of a file in a shard.
+  if (Index.Sources) {
+    const auto &FullGraph = *Index.Sources;
+    for (const auto &It : FullGraph) {
+      auto File = UriToFile[It.first()];
+      auto ShardIt = Shards.try_emplace(File);
+      ShardIt.first->getValue().IG = getSubGraph(It.first(), FullGraph);
+    }
+  }
+}
+std::vector<PathRef> FileShardedIndex::getAllFiles() const {
+  return {Shards.keys().begin(), Shards.keys().end()};
+}
+
+IndexFileIn FileShardedIndex::getShard(PathRef File) const {
+  auto It = Shards.find(File);
+  assert(It != Shards.end() && "received unknown file");
+
+  IndexFileIn IF;
+  IF.Sources = It->getValue().IG;
+  IF.Cmd = Index.Cmd;
+
+  SymbolSlab::Builder SymB;
+  for (const auto *S : It->getValue().Symbols)
+    SymB.insert(*S);
+  IF.Symbols = std::move(SymB).build();
+
+  RefSlab::Builder RefB;
+  for (const auto *Ref : It->getValue().Refs) {
+    auto SID = RefToSymID.lookup(Ref);
+    RefB.insert(SID, *Ref);
+  }
+  IF.Refs = std::move(RefB).build();
+
+  RelationSlab::Builder RelB;
+  for (const auto *Rel : It->getValue().Relations) {
+    RelB.insert(*Rel);
+  }
+  IF.Relations = std::move(RelB).build();
+  return IF;
+}
+
 SlabTuple indexMainDecls(ParsedAST &AST) {
   return indexSymbols(AST.getASTContext(), AST.getPreprocessorPtr(),
                       AST.getLocalTopLevelDecls(), &AST.getMacros(),
@@ -254,15 +390,24 @@ void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
                                ASTContext &AST,
                                std::shared_ptr<Preprocessor> PP,
                                const CanonicalIncludes &Includes) {
-  auto Slabs = indexHeaderSymbols(Version, AST, std::move(PP), Includes);
-  PreambleSymbols.update(
-      Path, std::make_unique<SymbolSlab>(std::move(std::get<0>(Slabs))),
-      std::make_unique<RefSlab>(),
-      std::make_unique<RelationSlab>(std::move(std::get<2>(Slabs))),
-      /*CountReferences=*/false);
+  IndexFileIn IF;
+  std::tie(IF.Symbols, std::ignore, IF.Relations) =
+      indexHeaderSymbols(Version, AST, std::move(PP), Includes);
+  FileShardedIndex ShardedIndex(std::move(IF), Path);
+  for (PathRef File : ShardedIndex.getAllFiles()) {
+    auto IF = ShardedIndex.getShard(File);
+    PreambleSymbols.update(
+        File, std::make_unique<SymbolSlab>(std::move(*IF.Symbols)),
+        std::make_unique<RefSlab>(),
+        std::make_unique<RelationSlab>(std::move(*IF.Relations)),
+        /*CountReferences=*/false);
+  }
   PreambleIndex.reset(
       PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light,
                                  DuplicateHandling::PickOne));
+  vlog("Build dynamic index for header symbols with estimated memory usage of "
+       "{0} bytes",
+       PreambleIndex.estimateMemoryUsage());
 }
 
 void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
@@ -274,6 +419,9 @@ void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
       /*CountReferences=*/true);
   MainFileIndex.reset(
       MainFileSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge));
+  vlog("Build dynamic index for main-file symbols with estimated memory usage "
+       "of {0} bytes",
+       MainFileIndex.estimateMemoryUsage());
 }
 
 } // namespace clangd

diff  --git a/clang-tools-extra/clangd/index/FileIndex.h b/clang-tools-extra/clangd/index/FileIndex.h
index ec44be9b89c2..539f232a7523 100644
--- a/clang-tools-extra/clangd/index/FileIndex.h
+++ b/clang-tools-extra/clangd/index/FileIndex.h
@@ -15,14 +15,24 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
 
+#include "Headers.h"
 #include "Index.h"
 #include "MemIndex.h"
 #include "Merge.h"
 #include "Path.h"
 #include "index/CanonicalIncludes.h"
+#include "index/Ref.h"
+#include "index/Relation.h"
+#include "index/Serialization.h"
 #include "index/Symbol.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include <memory>
+#include <vector>
 
 namespace clang {
 class ASTContext;
@@ -109,17 +119,15 @@ class FileIndex : public MergedIndex {
 private:
   bool UseDex; // FIXME: this should be always on.
 
-  // Contains information from each file's preamble only.
-  // These are large, but update fairly infrequently (preambles are stable).
+  // Contains information from each file's preamble only. Symbols and relations
+  // are sharded per declaration file to deduplicate multiple symbols and reduce
+  // memory usage.
   // Missing information:
   //  - symbol refs (these are always "from the main file")
   //  - definition locations in the main file
   //
-  // FIXME: Because the preambles for 
diff erent TUs have large overlap and
-  // FileIndex doesn't deduplicate, this uses lots of extra RAM.
-  // The biggest obstacle in fixing this: the obvious approach of partitioning
-  // by declaring file (rather than main file) fails if headers provide
-  // 
diff erent symbols based on preprocessor state.
+  // Note that we store only one version of a header, hence symbols appearing in
+  // 
diff erent PP states will be missing.
   FileSymbols PreambleSymbols;
   SwapIndex PreambleIndex;
 
@@ -146,6 +154,43 @@ SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
                              std::shared_ptr<Preprocessor> PP,
                              const CanonicalIncludes &Includes);
 
+/// Takes slabs coming from a TU (multiple files) and shards them per
+/// declaration location.
+struct FileShardedIndex {
+  /// \p HintPath is used to convert file URIs stored in symbols into absolute
+  /// paths.
+  explicit FileShardedIndex(IndexFileIn Input, PathRef HintPath);
+
+  /// Returns absolute paths for all files that has a shard.
+  std::vector<PathRef> getAllFiles() const;
+
+  /// Generates index shard for the \p File. Note that this function results in
+  /// a copy of all the relevant data.
+  /// Returned index will always have Symbol/Refs/Relation Slabs set, even if
+  /// they are empty.
+  IndexFileIn getShard(PathRef File) const;
+
+private:
+  // Contains all the information that belongs to a single file.
+  struct FileShard {
+    // Either declared or defined in the file.
+    llvm::DenseSet<const Symbol *> Symbols;
+    // Reference occurs in the file.
+    llvm::DenseSet<const Ref *> Refs;
+    // Subject is declared in the file.
+    llvm::DenseSet<const Relation *> Relations;
+    // Contains edges for only the direct includes.
+    IncludeGraph IG;
+  };
+
+  // Keeps all the information alive.
+  const IndexFileIn Index;
+  // Mapping from absolute paths to slab information.
+  llvm::StringMap<FileShard> Shards;
+  // Used to build RefSlabs.
+  llvm::DenseMap<const Ref *, SymbolID> RefToSymID;
+};
+
 } // namespace clangd
 } // namespace clang
 

diff  --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index 06190914ee8d..471061672107 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -283,6 +283,18 @@ bool SymbolCollector::handleDeclOccurrence(
   if (!ID)
     return true;
 
+  // ND is the canonical (i.e. first) declaration. If it's in the main file
+  // (which is not a header), then no public declaration was visible, so assume
+  // it's main-file only.
+  bool IsMainFileOnly =
+      SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
+      !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
+                    ASTCtx->getLangOpts());
+  // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
+  if (ASTNode.OrigD->isImplicit() ||
+      !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
+    return true;
+
   // Note: we need to process relations for all decl occurrences, including
   // refs, because the indexing code only populates relations for specific
   // occurrences. For example, RelationBaseOf is only populated for the
@@ -297,17 +309,6 @@ bool SymbolCollector::handleDeclOccurrence(
   if (IsOnlyRef && !CollectRef)
     return true;
 
-  // ND is the canonical (i.e. first) declaration. If it's in the main file
-  // (which is not a header), then no public declaration was visible, so assume
-  // it's main-file only.
-  bool IsMainFileOnly =
-      SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
-      !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
-                    ASTCtx->getLangOpts());
-  // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
-  if (ASTNode.OrigD->isImplicit() ||
-      !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
-    return true;
   // Do not store references to main-file symbols.
   // Unlike other fields, e.g. Symbols (which use spelling locations), we use
   // file locations for references (as it aligns the behavior of clangd's

diff  --git a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
index 5203fb67307c..dc39ad2acf25 100644
--- a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
@@ -9,13 +9,19 @@
 #include "AST.h"
 #include "Annotations.h"
 #include "Compiler.h"
+#include "Headers.h"
 #include "ParsedAST.h"
 #include "SyncAPI.h"
 #include "TestFS.h"
 #include "TestTU.h"
+#include "URI.h"
 #include "index/CanonicalIncludes.h"
 #include "index/FileIndex.h"
 #include "index/Index.h"
+#include "index/Ref.h"
+#include "index/Relation.h"
+#include "index/Serialization.h"
+#include "index/Symbol.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/Utils.h"
 #include "clang/Index/IndexSymbol.h"
@@ -23,6 +29,7 @@
 #include "clang/Tooling/CompilationDatabase.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include <utility>
 
 using ::testing::_;
 using ::testing::AllOf;
@@ -151,8 +158,9 @@ void update(FileIndex &M, llvm::StringRef Basename, llvm::StringRef Code) {
   File.HeaderFilename = (Basename + ".h").str();
   File.HeaderCode = std::string(Code);
   auto AST = File.build();
-  M.updatePreamble(File.Filename, /*Version=*/"null", AST.getASTContext(),
-                   AST.getPreprocessorPtr(), AST.getCanonicalIncludes());
+  M.updatePreamble(testPath(File.Filename), /*Version=*/"null",
+                   AST.getASTContext(), AST.getPreprocessorPtr(),
+                   AST.getCanonicalIncludes());
 }
 
 TEST(FileIndexTest, CustomizedURIScheme) {
@@ -393,8 +401,9 @@ TEST(FileIndexTest, Relations) {
   TU.HeaderCode = "class A {}; class B : public A {};";
   auto AST = TU.build();
   FileIndex Index;
-  Index.updatePreamble(TU.Filename, /*Version=*/"null", AST.getASTContext(),
-                       AST.getPreprocessorPtr(), AST.getCanonicalIncludes());
+  Index.updatePreamble(testPath(TU.Filename), /*Version=*/"null",
+                       AST.getASTContext(), AST.getPreprocessorPtr(),
+                       AST.getCanonicalIncludes());
   SymbolID A = findSymbol(TU.headerSymbols(), "A").ID;
   uint32_t Results = 0;
   RelationsRequest Req;
@@ -477,6 +486,128 @@ TEST(FileSymbolsTest, CountReferencesWithRefSlabs) {
                            AllOf(QName("2"), NumReferences(1u)),
                            AllOf(QName("3"), NumReferences(1u))));
 }
+
+TEST(FileIndexTest, StalePreambleSymbolsDeleted) {
+  FileIndex M;
+  TestTU File;
+  File.HeaderFilename = "a.h";
+
+  File.Filename = "f1.cpp";
+  File.HeaderCode = "int a;";
+  auto AST = File.build();
+  M.updatePreamble(testPath(File.Filename), /*Version=*/"null",
+                   AST.getASTContext(), AST.getPreprocessorPtr(),
+                   AST.getCanonicalIncludes());
+  EXPECT_THAT(runFuzzyFind(M, ""), UnorderedElementsAre(QName("a")));
+
+  File.Filename = "f2.cpp";
+  File.HeaderCode = "int b;";
+  AST = File.build();
+  M.updatePreamble(testPath(File.Filename), /*Version=*/"null",
+                   AST.getASTContext(), AST.getPreprocessorPtr(),
+                   AST.getCanonicalIncludes());
+  EXPECT_THAT(runFuzzyFind(M, ""), UnorderedElementsAre(QName("b")));
+}
+
+TEST(FileShardedIndexTest, Sharding) {
+  auto AHeaderUri = URI::create(testPath("a.h")).toString();
+  auto BHeaderUri = URI::create(testPath("b.h")).toString();
+  auto BSourceUri = URI::create(testPath("b.cc")).toString();
+
+  auto Sym1 = symbol("1");
+  Sym1.CanonicalDeclaration.FileURI = AHeaderUri.c_str();
+
+  auto Sym2 = symbol("2");
+  Sym2.CanonicalDeclaration.FileURI = BHeaderUri.c_str();
+  Sym2.Definition.FileURI = BSourceUri.c_str();
+
+  IndexFileIn IF;
+  {
+    SymbolSlab::Builder B;
+    // Should be stored in only a.h
+    B.insert(Sym1);
+    // Should be stored in both b.h and b.cc
+    B.insert(Sym2);
+    IF.Symbols = std::move(B).build();
+  }
+  {
+    // Should be stored in b.cc
+    IF.Refs = std::move(*refSlab(Sym1.ID, BSourceUri.c_str()).release());
+  }
+  {
+    RelationSlab::Builder B;
+    // Should be stored in a.h
+    B.insert(Relation{Sym1.ID, RelationKind::BaseOf, Sym2.ID});
+    // Should be stored in b.h
+    B.insert(Relation{Sym2.ID, RelationKind::BaseOf, Sym1.ID});
+    IF.Relations = std::move(B).build();
+  }
+
+  IF.Sources.emplace();
+  IncludeGraph &IG = *IF.Sources;
+  {
+    // b.cc includes b.h
+    auto &Node = IG[BSourceUri];
+    Node.DirectIncludes = {BHeaderUri};
+    Node.URI = BSourceUri;
+  }
+  {
+    // b.h includes a.h
+    auto &Node = IG[BHeaderUri];
+    Node.DirectIncludes = {AHeaderUri};
+    Node.URI = BHeaderUri;
+  }
+  {
+    // a.h includes nothing.
+    auto &Node = IG[AHeaderUri];
+    Node.DirectIncludes = {};
+    Node.URI = AHeaderUri;
+  }
+
+  IF.Cmd = tooling::CompileCommand(testRoot(), "b.cc", {"clang"}, "out");
+
+  FileShardedIndex ShardedIndex(std::move(IF), testPath("b.cc"));
+  ASSERT_THAT(
+      ShardedIndex.getAllFiles(),
+      UnorderedElementsAre(testPath("a.h"), testPath("b.h"), testPath("b.cc")));
+
+  {
+    auto Shard = ShardedIndex.getShard(testPath("a.h"));
+    EXPECT_THAT(Shard.Symbols.getValue(), UnorderedElementsAre(QName("1")));
+    EXPECT_THAT(Shard.Refs.getValue(), IsEmpty());
+    EXPECT_THAT(
+        Shard.Relations.getValue(),
+        UnorderedElementsAre(Relation{Sym1.ID, RelationKind::BaseOf, Sym2.ID}));
+    ASSERT_THAT(Shard.Sources.getValue().keys(),
+                UnorderedElementsAre(AHeaderUri));
+    EXPECT_THAT(Shard.Sources->lookup(AHeaderUri).DirectIncludes, IsEmpty());
+    EXPECT_TRUE(Shard.Cmd.hasValue());
+  }
+  {
+    auto Shard = ShardedIndex.getShard(testPath("b.h"));
+    EXPECT_THAT(Shard.Symbols.getValue(), UnorderedElementsAre(QName("2")));
+    EXPECT_THAT(Shard.Refs.getValue(), IsEmpty());
+    EXPECT_THAT(
+        Shard.Relations.getValue(),
+        UnorderedElementsAre(Relation{Sym2.ID, RelationKind::BaseOf, Sym1.ID}));
+    ASSERT_THAT(Shard.Sources.getValue().keys(),
+                UnorderedElementsAre(BHeaderUri, AHeaderUri));
+    EXPECT_THAT(Shard.Sources->lookup(BHeaderUri).DirectIncludes,
+                UnorderedElementsAre(AHeaderUri));
+    EXPECT_TRUE(Shard.Cmd.hasValue());
+  }
+  {
+    auto Shard = ShardedIndex.getShard(testPath("b.cc"));
+    EXPECT_THAT(Shard.Symbols.getValue(), UnorderedElementsAre(QName("2")));
+    EXPECT_THAT(Shard.Refs.getValue(), UnorderedElementsAre(Pair(Sym1.ID, _)));
+    EXPECT_THAT(Shard.Relations.getValue(), IsEmpty());
+    ASSERT_THAT(Shard.Sources.getValue().keys(),
+                UnorderedElementsAre(BSourceUri, BHeaderUri));
+    EXPECT_THAT(Shard.Sources->lookup(BSourceUri).DirectIncludes,
+                UnorderedElementsAre(BHeaderUri));
+    EXPECT_TRUE(Shard.Cmd.hasValue());
+  }
+}
 } // namespace
 } // namespace clangd
 } // namespace clang

diff  --git a/clang-tools-extra/clangd/unittests/TestTU.cpp b/clang-tools-extra/clangd/unittests/TestTU.cpp
index fe3e8f01cca8..640d067261bd 100644
--- a/clang-tools-extra/clangd/unittests/TestTU.cpp
+++ b/clang-tools-extra/clangd/unittests/TestTU.cpp
@@ -116,9 +116,10 @@ SymbolSlab TestTU::headerSymbols() const {
 std::unique_ptr<SymbolIndex> TestTU::index() const {
   auto AST = build();
   auto Idx = std::make_unique<FileIndex>(/*UseDex=*/true);
-  Idx->updatePreamble(Filename, /*Version=*/"null", AST.getASTContext(),
-                      AST.getPreprocessorPtr(), AST.getCanonicalIncludes());
-  Idx->updateMain(Filename, AST);
+  Idx->updatePreamble(testPath(Filename), /*Version=*/"null",
+                      AST.getASTContext(), AST.getPreprocessorPtr(),
+                      AST.getCanonicalIncludes());
+  Idx->updateMain(testPath(Filename), AST);
   return std::move(Idx);
 }
 


        


More information about the cfe-commits mailing list