[clang-tools-extra] r346938 - Introduce shard storage to auto-index.

Kadir Cetinkaya via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 15 02:31:10 PST 2018


Author: kadircet
Date: Thu Nov 15 02:31:10 2018
New Revision: 346938

URL: http://llvm.org/viewvc/llvm-project?rev=346938&view=rev
Log:
Introduce shard storage to auto-index.

Reviewers: sammccall, ioeric

Subscribers: ilya-biryukov, jkorous, arphaman, cfe-commits

Differential Revision: https://reviews.llvm.org/D54269

Modified:
    clang-tools-extra/trunk/clangd/index/Background.cpp
    clang-tools-extra/trunk/clangd/index/Background.h
    clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp

Modified: clang-tools-extra/trunk/clangd/index/Background.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.cpp?rev=346938&r1=346937&r2=346938&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Background.cpp Thu Nov 15 02:31:10 2018
@@ -26,26 +26,52 @@
 #include "llvm/Support/SHA1.h"
 #include <random>
 #include <string>
+#include <queue>
+#include <memory>
 
 using namespace llvm;
 namespace clang {
 namespace clangd {
 
-BackgroundIndex::BackgroundIndex(Context BackgroundContext,
-                                 StringRef ResourceDir,
-                                 const FileSystemProvider &FSProvider,
-                                 ArrayRef<std::string> URISchemes,
-                                 size_t ThreadPoolSize)
+namespace {
+
+static BackgroundIndex::FileDigest digest(StringRef Content) {
+  return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
+}
+
+static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
+                                                        FileID FID) {
+  bool Invalid = false;
+  StringRef Content = SM.getBufferData(FID, &Invalid);
+  if (Invalid)
+    return None;
+  return digest(Content);
+}
+
+llvm::SmallString<128>
+getShardPathFromFilePath(llvm::SmallString<128> ShardRoot,
+                         llvm::StringRef FilePath) {
+  sys::path::append(ShardRoot, sys::path::filename(FilePath) +
+                                   toHex(digest(FilePath)) + ".idx");
+  return ShardRoot;
+}
+
+} // namespace
+
+BackgroundIndex::BackgroundIndex(
+    Context BackgroundContext, StringRef ResourceDir,
+    const FileSystemProvider &FSProvider, ArrayRef<std::string> URISchemes,
+    std::unique_ptr<ShardStorage> IndexShardStorage, size_t ThreadPoolSize)
     : SwapIndex(make_unique<MemIndex>()), ResourceDir(ResourceDir),
       FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)),
-      URISchemes(URISchemes) {
+      URISchemes(URISchemes), IndexShardStorage(std::move(IndexShardStorage)) {
   assert(ThreadPoolSize > 0 && "Thread pool size can't be zero.");
   while (ThreadPoolSize--) {
     ThreadPool.emplace_back([this] { run(); });
     // Set priority to low, since background indexing is a long running task we
     // do not want to eat up cpu when there are any other high priority threads.
     // FIXME: In the future we might want a more general way of handling this to
-    // support a tasks with various priorities.
+    // support tasks with various priorities.
     setThreadPriority(ThreadPool.back(), ThreadPriority::Low);
   }
 }
@@ -123,6 +149,12 @@ void BackgroundIndex::enqueueAll(StringR
 }
 
 void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) {
+  // Initialize storage to project root. Since Initialize is no-op for multiple
+  // calls we can simply call it for each file.
+  if (IndexShardStorage && !IndexShardStorage->initialize(Cmd.Directory)) {
+    elog("Failed to initialize shard storage");
+    IndexShardStorage.reset();
+  }
   Queue.push_back(Bind(
       [this](tooling::CompileCommand Cmd) {
         std::string Filename = Cmd.Filename;
@@ -133,19 +165,6 @@ void BackgroundIndex::enqueueLocked(tool
       std::move(Cmd)));
 }
 
-static BackgroundIndex::FileDigest digest(StringRef Content) {
-  return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
-}
-
-static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
-                                                        FileID FID) {
-  bool Invalid = false;
-  StringRef Content = SM.getBufferData(FID, &Invalid);
-  if (Invalid)
-    return None;
-  return digest(Content);
-}
-
 // Resolves URI to file paths with cache.
 class URIToFileCache {
 public:
@@ -227,14 +246,25 @@ void BackgroundIndex::update(StringRef M
     for (const auto *R : F.second.Refs)
       Refs.insert(RefToIDs[R], *R);
 
+    auto SS = llvm::make_unique<SymbolSlab>(std::move(Syms).build());
+    auto RS = llvm::make_unique<RefSlab>(std::move(Refs).build());
+
+    auto Hash = FilesToUpdate.lookup(Path);
+    // Put shards into storage for subsequent use.
+    // FIXME: Store Hash in the Shard.
+    if (IndexShardStorage) {
+      IndexFileOut Shard;
+      Shard.Symbols = SS.get();
+      Shard.Refs = RS.get();
+      IndexShardStorage->storeShard(Path, Shard);
+    }
+
     std::lock_guard<std::mutex> Lock(DigestsMu);
     // This can override a newer version that is added in another thread,
     // if this thread sees the older version but finishes later. This should be
     // rare in practice.
-    IndexedFileDigests[Path] = FilesToUpdate.lookup(Path);
-    IndexedSymbols.update(Path,
-                          make_unique<SymbolSlab>(std::move(Syms).build()),
-                          make_unique<RefSlab>(std::move(Refs).build()));
+    IndexedFileDigests[Path] = Hash;
+    IndexedSymbols.update(Path, std::move(SS), std::move(RS));
   }
 }
 
@@ -293,6 +323,18 @@ Error BackgroundIndex::index(tooling::Co
     if (IndexedFileDigests.lookup(AbsolutePath) == Hash) {
       vlog("No need to index {0}, already up to date", AbsolutePath);
       return Error::success();
+    } else if (IndexShardStorage) { // Check if shard storage has the index.
+      auto Shard = IndexShardStorage->retrieveShard(AbsolutePath, Hash);
+      if (Shard) {
+        // FIXME: We might still want to re-index headers.
+        IndexedFileDigests[AbsolutePath] = Hash;
+        IndexedSymbols.update(
+            AbsolutePath, make_unique<SymbolSlab>(std::move(*Shard->Symbols)),
+            make_unique<RefSlab>(std::move(*Shard->Refs)));
+
+        vlog("Loaded {0} from storage", AbsolutePath);
+        return Error::success();
+      }
     }
 
     DigestsSnapshot = IndexedFileDigests;
@@ -359,5 +401,59 @@ Error BackgroundIndex::index(tooling::Co
   return Error::success();
 }
 
+llvm::Expected<IndexFileIn>
+DiskShardStorage::retrieveShard(llvm::StringRef ShardIdentifier,
+                                FileDigest Hash) const {
+  assert(Initialized && "Not initialized?");
+  llvm::SmallString<128> ShardPath;
+  {
+    std::lock_guard<std::mutex> Lock(DiskShardRootMu);
+    ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier);
+  }
+  auto Buffer = MemoryBuffer::getFile(ShardPath);
+  if (!Buffer) {
+    elog("Couldn't retrieve {0}: {1}", ShardPath, Buffer.getError().message());
+    return llvm::make_error<llvm::StringError>(Buffer.getError());
+  }
+  // FIXME: Change readIndexFile to also look at Hash of the source that
+  // generated index and skip if there is a mismatch.
+  return readIndexFile(Buffer->get()->getBuffer());
+}
+
+bool DiskShardStorage::storeShard(llvm::StringRef ShardIdentifier,
+                                  IndexFileOut Shard) const {
+  assert(Initialized && "Not initialized?");
+  llvm::SmallString<128> ShardPath;
+  {
+    std::lock_guard<std::mutex> Lock(DiskShardRootMu);
+    ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier);
+  }
+  std::error_code EC;
+  llvm::raw_fd_ostream OS(ShardPath, EC);
+  if (EC) {
+    elog("Failed to open {0} for writing: {1}", ShardPath, EC.message());
+    return false;
+  }
+  OS << Shard;
+  return true;
+}
+
+bool DiskShardStorage::initialize(llvm::StringRef Directory) {
+  if (Initialized)
+    return true;
+  std::lock_guard<std::mutex> Lock(DiskShardRootMu);
+  DiskShardRoot = Directory;
+  sys::path::append(DiskShardRoot, ".clangd-index/");
+  if (!llvm::sys::fs::exists(DiskShardRoot)) {
+    std::error_code OK;
+    std::error_code EC = llvm::sys::fs::create_directory(DiskShardRoot);
+    if (EC != OK) {
+      elog("Failed to create {0}: {1}", DiskShardRoot, EC.message());
+      return Initialized = false;
+    }
+  }
+  return Initialized = true;
+}
+
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/clangd/index/Background.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.h?rev=346938&r1=346937&r2=346938&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.h (original)
+++ clang-tools-extra/trunk/clangd/index/Background.h Thu Nov 15 02:31:10 2018
@@ -14,6 +14,7 @@
 #include "FSProvider.h"
 #include "index/FileIndex.h"
 #include "index/Index.h"
+#include "index/Serialization.h"
 #include "clang/Tooling/CompilationDatabase.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/SHA1.h"
@@ -27,6 +28,17 @@
 namespace clang {
 namespace clangd {
 
+// Base class for Shard Storage operations. See DiskShardStorage for more info.
+class ShardStorage {
+public:
+  using FileDigest = decltype(llvm::SHA1::hash({}));
+  virtual bool storeShard(llvm::StringRef ShardIdentifier,
+                          IndexFileOut Shard) const = 0;
+  virtual llvm::Expected<IndexFileIn>
+  retrieveShard(llvm::StringRef ShardIdentifier, FileDigest Hash) const = 0;
+  virtual bool initialize(llvm::StringRef Directory) = 0;
+};
+
 // Builds an in-memory index by by running the static indexer action over
 // all commands in a compilation database. Indexing happens in the background.
 // FIXME: it should also persist its state on disk for fast start.
@@ -34,8 +46,9 @@ namespace clangd {
 class BackgroundIndex : public SwapIndex {
 public:
   // FIXME: resource-dir injection should be hoisted somewhere common.
-  BackgroundIndex(Context BackgroundContext, StringRef ResourceDir,
+  BackgroundIndex(Context BackgroundContext, llvm::StringRef ResourceDir,
                   const FileSystemProvider &, ArrayRef<std::string> URISchemes,
+                  std::unique_ptr<ShardStorage> IndexShardStorage = nullptr,
                   size_t ThreadPoolSize = llvm::hardware_concurrency());
   ~BackgroundIndex(); // Blocks while the current task finishes.
 
@@ -66,6 +79,7 @@ private:
   const FileSystemProvider &FSProvider;
   Context BackgroundContext;
   std::vector<std::string> URISchemes;
+  std::unique_ptr<ShardStorage> IndexShardStorage;
 
   // index state
   llvm::Error index(tooling::CompileCommand);
@@ -86,6 +100,30 @@ private:
   std::vector<std::thread> ThreadPool; // FIXME: Abstract this away.
 };
 
+// Handles storage and retrieval of index shards into disk. Requires Initialize
+// to be called before storing or retrieval. Creates a directory called
+// ".clangd-index/" under the path provided during initialize. This class is
+// thread-safe.
+class DiskShardStorage : public ShardStorage {
+  mutable std::mutex DiskShardRootMu;
+  llvm::SmallString<128> DiskShardRoot;
+  bool Initialized;
+
+public:
+  // Retrieves the shard if found and contents are consistent with the provided
+  // Hash.
+  llvm::Expected<IndexFileIn> retrieveShard(llvm::StringRef ShardIdentifier,
+                                            FileDigest Hash) const;
+
+  // Stores given shard with name ShardIdentifier under initialized directory.
+  bool storeShard(llvm::StringRef ShardIdentifier, IndexFileOut Shard) const;
+
+  // Initializes DiskShardRoot to (Directory + ".clangd-index/") which is the
+  // base directory for all shard files. After the initialization succeeds all
+  // subsequent calls or no-op.
+  bool initialize(llvm::StringRef Directory);
+};
+
 } // namespace clangd
 } // namespace clang
 

Modified: clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp?rev=346938&r1=346937&r2=346938&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp Thu Nov 15 02:31:10 2018
@@ -78,5 +78,79 @@ TEST(BackgroundIndexTest, IndexTwoFiles)
                        FileURI("unittest:///root/B.cc")}));
 }
 
+TEST(BackgroundIndexTest, ShardStorageTest) {
+  class MemoryShardStorage : public ShardStorage {
+    mutable std::mutex StorageMu;
+    llvm::StringMap<std::string> &Storage;
+    size_t& CacheHits;
+
+  public:
+    MemoryShardStorage(llvm::StringMap<std::string> &Storage, size_t &CacheHits)
+        : Storage(Storage), CacheHits(CacheHits) {}
+
+    bool storeShard(llvm::StringRef ShardIdentifier, IndexFileOut Shard) const {
+      std::lock_guard<std::mutex> Lock(StorageMu);
+      std::string &str = Storage[ShardIdentifier];
+      llvm::raw_string_ostream OS(str);
+      OS << Shard;
+      OS.flush();
+      return true;
+    }
+    llvm::Expected<IndexFileIn> retrieveShard(llvm::StringRef ShardIdentifier,
+                                              FileDigest Hash) const {
+      std::lock_guard<std::mutex> Lock(StorageMu);
+      if (Storage.find(ShardIdentifier) == Storage.end())
+        return llvm::make_error<llvm::StringError>(
+            "Shard not found.", llvm::inconvertibleErrorCode());
+      auto IndexFile = readIndexFile(Storage[ShardIdentifier]);
+      if(!IndexFile)
+        return IndexFile;
+      CacheHits++;
+      return IndexFile;
+    }
+    bool initialize(llvm::StringRef Directory) { return true; }
+  };
+  MockFSProvider FS;
+  FS.Files[testPath("root/A.h")] = R"cpp(
+      void common();
+      void f_b();
+      class A_CC {};
+      )cpp";
+  FS.Files[testPath("root/A.cc")] =
+      "#include \"A.h\"\nvoid g() { (void)common; }";
+  llvm::StringMap<std::string> Storage;
+  size_t CacheHits = 0;
+  tooling::CompileCommand Cmd;
+  Cmd.Filename = testPath("root/A.cc");
+  Cmd.Directory = testPath("root");
+  Cmd.CommandLine = {"clang++", testPath("root/A.cc")};
+  {
+    BackgroundIndex Idx(
+        Context::empty(), "", FS, /*URISchemes=*/{"unittest"},
+        /*IndexShardStorage=*/
+        llvm::make_unique<MemoryShardStorage>(Storage, CacheHits));
+    Idx.enqueue(testPath("root"), Cmd);
+    Idx.blockUntilIdleForTest();
+  }
+  EXPECT_EQ(CacheHits, 0U);
+  EXPECT_EQ(Storage.size(), 2U);
+  EXPECT_NE(Storage.find(testPath("root/A.h")), Storage.end());
+  EXPECT_NE(Storage.find(testPath("root/A.cc")), Storage.end());
+
+  {
+    BackgroundIndex Idx(
+        Context::empty(), "", FS, /*URISchemes=*/{"unittest"},
+        /*IndexShardStorage=*/
+        llvm::make_unique<MemoryShardStorage>(Storage, CacheHits));
+    Idx.enqueue(testPath("root"), Cmd);
+    Idx.blockUntilIdleForTest();
+  }
+  EXPECT_EQ(CacheHits, 1U);
+  EXPECT_EQ(Storage.size(), 2U);
+  EXPECT_NE(Storage.find(testPath("root/A.h")), Storage.end());
+  EXPECT_NE(Storage.find(testPath("root/A.cc")), Storage.end());
+  // B_CC is dropped as we don't collect symbols from A.h in this compilation.
+}
+
 } // namespace clangd
 } // namespace clang




More information about the cfe-commits mailing list