[clang-tools-extra] 97c407d - [clangd] Make use of URIs in FileShardedIndex

Kadir Cetinkaya via cfe-commits cfe-commits at lists.llvm.org
Thu Apr 30 00:50:51 PDT 2020


Author: Kadir Cetinkaya
Date: 2020-04-30T09:47:59+02:00
New Revision: 97c407db772521cd69a86433672f981a13c0abb2

URL: https://github.com/llvm/llvm-project/commit/97c407db772521cd69a86433672f981a13c0abb2
DIFF: https://github.com/llvm/llvm-project/commit/97c407db772521cd69a86433672f981a13c0abb2.diff

LOG: [clangd] Make use of URIs in FileShardedIndex

Summary:
This makes FileShardedIndex more robust and gets rid of the need for a
URIToFileCache, as it is done by the callers now and it is only once per file,
rather than once per symbol.

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D79079

Added: 
    

Modified: 
    clang-tools-extra/clangd/index/Background.cpp
    clang-tools-extra/clangd/index/FileIndex.cpp
    clang-tools-extra/clangd/index/FileIndex.h
    clang-tools-extra/clangd/unittests/FileIndexTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp
index 67fed77034f1..a46ba18f5b6c 100644
--- a/clang-tools-extra/clangd/index/Background.cpp
+++ b/clang-tools-extra/clangd/index/Background.cpp
@@ -174,28 +174,34 @@ void BackgroundIndex::update(
     llvm::StringRef MainFile, IndexFileIn Index,
     const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
     bool HadErrors) {
-  llvm::StringMap<FileDigest> FilesToUpdate;
+  // Keys are URIs.
+  llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate;
+  // Note that sources do not contain any information regarding missing headers,
+  // since we don't even know what absolute path they should fall in.
   for (const auto &IndexIt : *Index.Sources) {
     const auto &IGN = IndexIt.getValue();
-    // Note that sources do not contain any information regarding missing
-    // headers, since we don't even know what absolute path they should fall in.
-    auto AbsPath = llvm::cantFail(URI::resolve(IGN.URI, MainFile),
-                                  "Failed to resovle URI");
-    const auto DigestIt = ShardVersionsSnapshot.find(AbsPath);
+    auto AbsPath = URI::resolve(IGN.URI, MainFile);
+    if (!AbsPath) {
+      elog("Failed to resolve URI: {0}", AbsPath.takeError());
+      continue;
+    }
+    const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath);
     // File has 
diff erent contents, or indexing was successful this time.
     if (DigestIt == ShardVersionsSnapshot.end() ||
         DigestIt->getValue().Digest != IGN.Digest ||
         (DigestIt->getValue().HadErrors && !HadErrors))
-      FilesToUpdate[AbsPath] = IGN.Digest;
+      FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest};
   }
 
   // Shard slabs into files.
-  FileShardedIndex ShardedIndex(std::move(Index), MainFile);
+  FileShardedIndex ShardedIndex(std::move(Index));
 
   // Build and store new slabs for each updated file.
   for (const auto &FileIt : FilesToUpdate) {
-    PathRef Path = FileIt.first();
-    auto IF = ShardedIndex.getShard(Path);
+    auto Uri = FileIt.first();
+    // ShardedIndex should always have a shard for a file in Index.Sources.
+    auto IF = ShardedIndex.getShard(Uri).getValue();
+    PathRef Path = FileIt.getValue().first;
 
     // Only store command line hash for main files of the TU, since our
     // current model keeps only one version of a header file.
@@ -211,7 +217,7 @@ void BackgroundIndex::update(
 
     {
       std::lock_guard<std::mutex> Lock(ShardVersionsMu);
-      const auto &Hash = FileIt.getValue();
+      const auto &Hash = FileIt.getValue().second;
       auto DigestIt = ShardVersions.try_emplace(Path);
       ShardVersion &SV = DigestIt.first->second;
       // Skip if file is already up to date, unless previous index was broken

diff  --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp
index 6a94b9c1a696..bf236bd8411b 100644
--- a/clang-tools-extra/clangd/index/FileIndex.cpp
+++ b/clang-tools-extra/clangd/index/FileIndex.cpp
@@ -29,6 +29,7 @@
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
@@ -96,27 +97,6 @@ SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
                          std::move(Relations));
 }
 
-// Resolves URI to file paths with cache.
-class URIToFileCache {
-public:
-  URIToFileCache(PathRef HintPath) : HintPath(HintPath) {}
-
-  llvm::StringRef operator[](llvm::StringRef FileURI) {
-    if (FileURI.empty())
-      return "";
-    auto I = URIToPathCache.try_emplace(FileURI);
-    if (I.second) {
-      I.first->second = llvm::cantFail(URI::resolve(FileURI, HintPath),
-                                       "Failed to resolve URI");
-    }
-    return I.first->second;
-  }
-
-private:
-  PathRef HintPath;
-  llvm::StringMap<std::string> URIToPathCache;
-};
-
 // We keep only the node "U" and its edges. Any node other than "U" will be
 // empty in the resultant graph.
 IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) {
@@ -137,24 +117,21 @@ IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) {
 }
 } // namespace
 
-FileShardedIndex::FileShardedIndex(IndexFileIn Input, PathRef HintPath)
+FileShardedIndex::FileShardedIndex(IndexFileIn Input)
     : Index(std::move(Input)) {
-  URIToFileCache UriToFile(HintPath);
   // Used to build RelationSlabs.
   llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile;
 
   // Attribute each Symbol to both their declaration and definition locations.
   if (Index.Symbols) {
     for (const auto &S : *Index.Symbols) {
-      auto File = UriToFile[S.CanonicalDeclaration.FileURI];
-      auto It = Shards.try_emplace(File);
+      auto It = Shards.try_emplace(S.CanonicalDeclaration.FileURI);
       It.first->getValue().Symbols.insert(&S);
       SymbolIDToFile[S.ID] = &It.first->getValue();
       // Only bother if definition file is 
diff erent than declaration file.
       if (S.Definition &&
           S.Definition.FileURI != S.CanonicalDeclaration.FileURI) {
-        auto File = UriToFile[S.Definition.FileURI];
-        auto It = Shards.try_emplace(File);
+        auto It = Shards.try_emplace(S.Definition.FileURI);
         It.first->getValue().Symbols.insert(&S);
       }
     }
@@ -163,8 +140,7 @@ FileShardedIndex::FileShardedIndex(IndexFileIn Input, PathRef HintPath)
   if (Index.Refs) {
     for (const auto &SymRefs : *Index.Refs) {
       for (const auto &R : SymRefs.second) {
-        auto File = UriToFile[R.Location.FileURI];
-        const auto It = Shards.try_emplace(File);
+        const auto It = Shards.try_emplace(R.Location.FileURI);
         It.first->getValue().Refs.insert(&R);
         RefToSymID[&R] = SymRefs.first;
       }
@@ -183,25 +159,26 @@ FileShardedIndex::FileShardedIndex(IndexFileIn Input, PathRef HintPath)
   if (Index.Sources) {
     const auto &FullGraph = *Index.Sources;
     for (const auto &It : FullGraph) {
-      auto File = UriToFile[It.first()];
-      auto ShardIt = Shards.try_emplace(File);
+      auto ShardIt = Shards.try_emplace(It.first());
       ShardIt.first->getValue().IG = getSubGraph(It.first(), FullGraph);
     }
   }
 }
-std::vector<PathRef> FileShardedIndex::getAllFiles() const {
+std::vector<llvm::StringRef> FileShardedIndex::getAllSources() const {
   // It should be enough to construct a vector with {Shards.keys().begin(),
   // Shards.keys().end()} but MSVC fails to compile that.
   std::vector<PathRef> Result;
   Result.reserve(Shards.size());
-  for (PathRef Key : Shards.keys())
+  for (auto Key : Shards.keys())
     Result.push_back(Key);
   return Result;
 }
 
-IndexFileIn FileShardedIndex::getShard(PathRef File) const {
-  auto It = Shards.find(File);
-  assert(It != Shards.end() && "received unknown file");
+llvm::Optional<IndexFileIn>
+FileShardedIndex::getShard(llvm::StringRef Uri) const {
+  auto It = Shards.find(Uri);
+  if (It == Shards.end())
+    return llvm::None;
 
   IndexFileIn IF;
   IF.Sources = It->getValue().IG;
@@ -245,27 +222,28 @@ SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
                       /*IsIndexMainAST=*/false, Version);
 }
 
-void FileSymbols::update(PathRef Path, std::unique_ptr<SymbolSlab> Symbols,
+void FileSymbols::update(llvm::StringRef Key,
+                         std::unique_ptr<SymbolSlab> Symbols,
                          std::unique_ptr<RefSlab> Refs,
                          std::unique_ptr<RelationSlab> Relations,
                          bool CountReferences) {
   std::lock_guard<std::mutex> Lock(Mutex);
   if (!Symbols)
-    FileToSymbols.erase(Path);
+    SymbolsSnapshot.erase(Key);
   else
-    FileToSymbols[Path] = std::move(Symbols);
+    SymbolsSnapshot[Key] = std::move(Symbols);
   if (!Refs) {
-    FileToRefs.erase(Path);
+    RefsSnapshot.erase(Key);
   } else {
     RefSlabAndCountReferences Item;
     Item.CountReferences = CountReferences;
     Item.Slab = std::move(Refs);
-    FileToRefs[Path] = std::move(Item);
+    RefsSnapshot[Key] = std::move(Item);
   }
   if (!Relations)
-    FileToRelations.erase(Path);
+    RelatiosSnapshot.erase(Key);
   else
-    FileToRelations[Path] = std::move(Relations);
+    RelatiosSnapshot[Key] = std::move(Relations);
 }
 
 std::unique_ptr<SymbolIndex>
@@ -276,14 +254,14 @@ FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle) {
   std::vector<RefSlab *> MainFileRefs;
   {
     std::lock_guard<std::mutex> Lock(Mutex);
-    for (const auto &FileAndSymbols : FileToSymbols)
+    for (const auto &FileAndSymbols : SymbolsSnapshot)
       SymbolSlabs.push_back(FileAndSymbols.second);
-    for (const auto &FileAndRefs : FileToRefs) {
+    for (const auto &FileAndRefs : RefsSnapshot) {
       RefSlabs.push_back(FileAndRefs.second.Slab);
       if (FileAndRefs.second.CountReferences)
         MainFileRefs.push_back(RefSlabs.back().get());
     }
-    for (const auto &FileAndRelations : FileToRelations)
+    for (const auto &FileAndRelations : RelatiosSnapshot)
       RelationSlabs.push_back(FileAndRelations.second);
   }
   std::vector<const Symbol *> AllSymbols;
@@ -399,11 +377,13 @@ void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
   IndexFileIn IF;
   std::tie(IF.Symbols, std::ignore, IF.Relations) =
       indexHeaderSymbols(Version, AST, std::move(PP), Includes);
-  FileShardedIndex ShardedIndex(std::move(IF), Path);
-  for (PathRef File : ShardedIndex.getAllFiles()) {
-    auto IF = ShardedIndex.getShard(File);
+  FileShardedIndex ShardedIndex(std::move(IF));
+  for (auto Uri : ShardedIndex.getAllSources()) {
+    // We are using the key received from ShardedIndex, so it should always
+    // exist.
+    auto IF = ShardedIndex.getShard(Uri).getValue();
     PreambleSymbols.update(
-        File, std::make_unique<SymbolSlab>(std::move(*IF.Symbols)),
+        Uri, std::make_unique<SymbolSlab>(std::move(*IF.Symbols)),
         std::make_unique<RefSlab>(),
         std::make_unique<RelationSlab>(std::move(*IF.Relations)),
         /*CountReferences=*/false);

diff  --git a/clang-tools-extra/clangd/index/FileIndex.h b/clang-tools-extra/clangd/index/FileIndex.h
index 5c8776659e8f..65a2c305dca5 100644
--- a/clang-tools-extra/clangd/index/FileIndex.h
+++ b/clang-tools-extra/clangd/index/FileIndex.h
@@ -55,30 +55,30 @@ enum class DuplicateHandling {
   Merge,
 };
 
-/// A container of Symbols from several source files. It can be updated
-/// at source-file granularity, replacing all symbols from one file with a new
-/// set.
+/// A container of slabs associated with a key. It can be updated at key
+/// granularity, replacing all slabs belonging to a key with a new set. Keys are
+/// usually file paths/uris.
 ///
-/// This implements a snapshot semantics for symbols in a file. Each update to a
-/// file will create a new snapshot for all symbols in the file. Snapshots are
-/// managed with shared pointers that are shared between this class and the
-/// users. For each file, this class only stores a pointer pointing to the
-/// newest snapshot, and an outdated snapshot is deleted by the last owner of
-/// the snapshot, either this class or the symbol index.
+/// This implements snapshot semantics. Each update will create a new snapshot
+/// for all slabs of the Key. Snapshots are managed with shared pointers that
+/// are shared between this class and the users. For each key, this class only
+/// stores a pointer pointing to the newest snapshot, and an outdated snapshot
+/// is deleted by the last owner of the snapshot, either this class or the
+/// symbol index.
 ///
 /// The snapshot semantics keeps critical sections minimal since we only need
 /// locking when we swap or obtain references to snapshots.
 class FileSymbols {
 public:
-  /// Updates all symbols and refs in a file.
-  /// If either is nullptr, corresponding data for \p Path will be removed.
-  /// If CountReferences is true, \p Refs will be used for counting References
+  /// Updates all slabs associated with the \p Key.
+  /// If either is nullptr, corresponding data for \p Key will be removed.
+  /// If CountReferences is true, \p Refs will be used for counting references
   /// during merging.
-  void update(PathRef Path, std::unique_ptr<SymbolSlab> Slab,
+  void update(llvm::StringRef Key, std::unique_ptr<SymbolSlab> Symbols,
               std::unique_ptr<RefSlab> Refs,
               std::unique_ptr<RelationSlab> Relations, bool CountReferences);
 
-  /// The index keeps the symbols alive.
+  /// The index keeps the slabs alive.
   /// Will count Symbol::References based on number of references in the main
   /// files, while building the index with DuplicateHandling::Merge option.
   std::unique_ptr<SymbolIndex>
@@ -92,12 +92,9 @@ class FileSymbols {
   };
   mutable std::mutex Mutex;
 
-  /// Stores the latest symbol snapshots for all active files.
-  llvm::StringMap<std::shared_ptr<SymbolSlab>> FileToSymbols;
-  /// Stores the latest ref snapshots for all active files.
-  llvm::StringMap<RefSlabAndCountReferences> FileToRefs;
-  /// Stores the latest relation snapshots for all active files.
-  llvm::StringMap<std::shared_ptr<RelationSlab>> FileToRelations;
+  llvm::StringMap<std::shared_ptr<SymbolSlab>> SymbolsSnapshot;
+  llvm::StringMap<RefSlabAndCountReferences> RefsSnapshot;
+  llvm::StringMap<std::shared_ptr<RelationSlab>> RelatiosSnapshot;
 };
 
 /// This manages symbols from files and an in-memory index on all symbols.
@@ -159,16 +156,16 @@ SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
 struct FileShardedIndex {
   /// \p HintPath is used to convert file URIs stored in symbols into absolute
   /// paths.
-  explicit FileShardedIndex(IndexFileIn Input, PathRef HintPath);
+  explicit FileShardedIndex(IndexFileIn Input);
 
-  /// Returns absolute paths for all files that has a shard.
-  std::vector<PathRef> getAllFiles() const;
+  /// Returns uris for all files that has a shard.
+  std::vector<llvm::StringRef> getAllSources() const;
 
-  /// Generates index shard for the \p File. Note that this function results in
+  /// Generates index shard for the \p Uri. Note that this function results in
   /// a copy of all the relevant data.
   /// Returned index will always have Symbol/Refs/Relation Slabs set, even if
   /// they are empty.
-  IndexFileIn getShard(PathRef File) const;
+  llvm::Optional<IndexFileIn> getShard(llvm::StringRef Uri) const;
 
 private:
   // Contains all the information that belongs to a single file.
@@ -185,7 +182,7 @@ struct FileShardedIndex {
 
   // Keeps all the information alive.
   const IndexFileIn Index;
-  // Mapping from absolute paths to slab information.
+  // Mapping from URIs to slab information.
   llvm::StringMap<FileShard> Shards;
   // Used to build RefSlabs.
   llvm::DenseMap<const Ref *, SymbolID> RefToSymID;

diff  --git a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
index 9631c920fb27..c0763cf0525e 100644
--- a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
@@ -568,13 +568,12 @@ TEST(FileShardedIndexTest, Sharding) {
 
   IF.Cmd = tooling::CompileCommand(testRoot(), "b.cc", {"clang"}, "out");
 
-  FileShardedIndex ShardedIndex(std::move(IF), testPath("b.cc"));
-  ASSERT_THAT(
-      ShardedIndex.getAllFiles(),
-      UnorderedElementsAre(testPath("a.h"), testPath("b.h"), testPath("b.cc")));
+  FileShardedIndex ShardedIndex(std::move(IF));
+  ASSERT_THAT(ShardedIndex.getAllSources(),
+              UnorderedElementsAre(AHeaderUri, BHeaderUri, BSourceUri));
 
   {
-    auto Shard = ShardedIndex.getShard(testPath("a.h"));
+    auto Shard = ShardedIndex.getShard(AHeaderUri).getValue();
     EXPECT_THAT(Shard.Symbols.getValue(), UnorderedElementsAre(QName("1")));
     EXPECT_THAT(Shard.Refs.getValue(), IsEmpty());
     EXPECT_THAT(
@@ -586,7 +585,7 @@ TEST(FileShardedIndexTest, Sharding) {
     EXPECT_TRUE(Shard.Cmd.hasValue());
   }
   {
-    auto Shard = ShardedIndex.getShard(testPath("b.h"));
+    auto Shard = ShardedIndex.getShard(BHeaderUri).getValue();
     EXPECT_THAT(Shard.Symbols.getValue(), UnorderedElementsAre(QName("2")));
     EXPECT_THAT(Shard.Refs.getValue(), IsEmpty());
     EXPECT_THAT(
@@ -599,7 +598,7 @@ TEST(FileShardedIndexTest, Sharding) {
     EXPECT_TRUE(Shard.Cmd.hasValue());
   }
   {
-    auto Shard = ShardedIndex.getShard(testPath("b.cc"));
+    auto Shard = ShardedIndex.getShard(BSourceUri).getValue();
     EXPECT_THAT(Shard.Symbols.getValue(), UnorderedElementsAre(QName("2")));
     EXPECT_THAT(Shard.Refs.getValue(), UnorderedElementsAre(Pair(Sym1.ID, _)));
     EXPECT_THAT(Shard.Relations.getValue(), IsEmpty());


        


More information about the cfe-commits mailing list