[clang-tools-extra] r347038 - Introduce shard storage to auto-index.
Kadir Cetinkaya via cfe-commits
cfe-commits at lists.llvm.org
Fri Nov 16 01:03:56 PST 2018
Author: kadircet
Date: Fri Nov 16 01:03:56 2018
New Revision: 347038
URL: http://llvm.org/viewvc/llvm-project?rev=347038&view=rev
Log:
Introduce shard storage to auto-index.
Reviewers: sammccall, ioeric
Reviewed By: sammccall
Subscribers: llvm-commits, mgorny, Eugene.Zelenko, ilya-biryukov, jkorous, arphaman, cfe-commits
Differential Revision: https://reviews.llvm.org/D54269
Added:
clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp
Modified:
clang-tools-extra/trunk/clangd/CMakeLists.txt
clang-tools-extra/trunk/clangd/index/Background.cpp
clang-tools-extra/trunk/clangd/index/Background.h
clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp
Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=347038&r1=347037&r2=347038&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt Fri Nov 16 01:03:56 2018
@@ -38,6 +38,7 @@ add_clang_library(clangDaemon
XRefs.cpp
index/Background.cpp
+ index/BackgroundIndexStorage.cpp
index/CanonicalIncludes.cpp
index/FileIndex.cpp
index/Index.cpp
Modified: clang-tools-extra/trunk/clangd/index/Background.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.cpp?rev=347038&r1=347037&r2=347038&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Background.cpp Fri Nov 16 01:03:56 2018
@@ -24,6 +24,9 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/SHA1.h"
+
+#include <memory>
+#include <queue>
#include <random>
#include <string>
@@ -31,21 +34,22 @@ using namespace llvm;
namespace clang {
namespace clangd {
-BackgroundIndex::BackgroundIndex(Context BackgroundContext,
- StringRef ResourceDir,
- const FileSystemProvider &FSProvider,
- ArrayRef<std::string> URISchemes,
- size_t ThreadPoolSize)
+BackgroundIndex::BackgroundIndex(
+ Context BackgroundContext, StringRef ResourceDir,
+ const FileSystemProvider &FSProvider, ArrayRef<std::string> URISchemes,
+ BackgroundIndexStorage::Factory IndexStorageFactory, size_t ThreadPoolSize)
: SwapIndex(make_unique<MemIndex>()), ResourceDir(ResourceDir),
FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)),
- URISchemes(URISchemes) {
+ URISchemes(URISchemes),
+ IndexStorageFactory(std::move(IndexStorageFactory)) {
assert(ThreadPoolSize > 0 && "Thread pool size can't be zero.");
+ assert(IndexStorageFactory && "Storage factory can not be null!");
while (ThreadPoolSize--) {
ThreadPool.emplace_back([this] { run(); });
// Set priority to low, since background indexing is a long running task we
// do not want to eat up cpu when there are any other high priority threads.
// FIXME: In the future we might want a more general way of handling this to
- // support a tasks with various priorities.
+ // support tasks with various priorities.
setThreadPriority(ThreadPool.back(), ThreadPriority::Low);
}
}
@@ -97,9 +101,10 @@ void BackgroundIndex::blockUntilIdleForT
void BackgroundIndex::enqueue(StringRef Directory,
tooling::CompileCommand Cmd) {
+ BackgroundIndexStorage *IndexStorage = IndexStorageFactory(Directory);
{
std::lock_guard<std::mutex> Lock(QueueMu);
- enqueueLocked(std::move(Cmd));
+ enqueueLocked(std::move(Cmd), IndexStorage);
}
QueueCV.notify_all();
}
@@ -110,6 +115,7 @@ void BackgroundIndex::enqueueAll(StringR
// FIXME: this function may be slow. Perhaps enqueue a task to re-read the CDB
// from disk and enqueue the commands asynchronously?
auto Cmds = CDB.getAllCompileCommands();
+ BackgroundIndexStorage *IndexStorage = IndexStorageFactory(Directory);
SPAN_ATTACH(Tracer, "commands", int64_t(Cmds.size()));
std::mt19937 Generator(std::random_device{}());
std::shuffle(Cmds.begin(), Cmds.end(), Generator);
@@ -117,17 +123,18 @@ void BackgroundIndex::enqueueAll(StringR
{
std::lock_guard<std::mutex> Lock(QueueMu);
for (auto &Cmd : Cmds)
- enqueueLocked(std::move(Cmd));
+ enqueueLocked(std::move(Cmd), IndexStorage);
}
QueueCV.notify_all();
}
-void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) {
+void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd,
+ BackgroundIndexStorage *IndexStorage) {
Queue.push_back(Bind(
- [this](tooling::CompileCommand Cmd) {
+ [this, IndexStorage](tooling::CompileCommand Cmd) {
std::string Filename = Cmd.Filename;
Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir);
- if (auto Error = index(std::move(Cmd)))
+ if (auto Error = index(std::move(Cmd), IndexStorage))
log("Indexing {0} failed: {1}", Filename, std::move(Error));
},
std::move(Cmd)));
@@ -179,7 +186,8 @@ private:
/// Given index results from a TU, only update files in \p FilesToUpdate.
void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
RefSlab Refs,
- const StringMap<FileDigest> &FilesToUpdate) {
+ const StringMap<FileDigest> &FilesToUpdate,
+ BackgroundIndexStorage *IndexStorage) {
// Partition symbols/references into files.
struct File {
DenseSet<const Symbol *> Symbols;
@@ -227,20 +235,35 @@ void BackgroundIndex::update(StringRef M
for (const auto *R : F.second.Refs)
Refs.insert(RefToIDs[R], *R);
+ auto SS = llvm::make_unique<SymbolSlab>(std::move(Syms).build());
+ auto RS = llvm::make_unique<RefSlab>(std::move(Refs).build());
+
+ auto Hash = FilesToUpdate.lookup(Path);
+ // We need to store shards before updating the index, since the latter
+ // consumes slabs.
+ // FIXME: Store Hash in the Shard.
+ if (IndexStorage) {
+ IndexFileOut Shard;
+ Shard.Symbols = SS.get();
+ Shard.Refs = RS.get();
+ if (auto Error = IndexStorage->storeShard(Path, Shard))
+ elog("Failed to write background-index shard for file {0}: {1}", Path,
+ std::move(Error));
+ }
+
std::lock_guard<std::mutex> Lock(DigestsMu);
// This can override a newer version that is added in another thread,
// if this thread sees the older version but finishes later. This should be
// rare in practice.
- IndexedFileDigests[Path] = FilesToUpdate.lookup(Path);
- IndexedSymbols.update(Path,
- make_unique<SymbolSlab>(std::move(Syms).build()),
- make_unique<RefSlab>(std::move(Refs).build()));
+ IndexedFileDigests[Path] = Hash;
+ IndexedSymbols.update(Path, std::move(SS), std::move(RS));
}
}
// Creates a filter to not collect index results from files with unchanged
// digests.
-// \p FileDigests contains file digests for the current indexed files, and all changed files will be added to \p FilesToUpdate.
+// \p FileDigests contains file digests for the current indexed files, and all
+// changed files will be added to \p FilesToUpdate.
decltype(SymbolCollector::Options::FileFilter) createFileFilter(
const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests,
llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) {
@@ -269,7 +292,8 @@ decltype(SymbolCollector::Options::FileF
};
}
-Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
+Error BackgroundIndex::index(tooling::CompileCommand Cmd,
+ BackgroundIndexStorage *IndexStorage) {
trace::Span Tracer("BackgroundIndex");
SPAN_ATTACH(Tracer, "file", Cmd.Filename);
SmallString<128> AbsolutePath;
@@ -342,7 +366,8 @@ Error BackgroundIndex::index(tooling::Co
Symbols.size(), Refs.numRefs());
SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
- update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate);
+ update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate,
+ IndexStorage);
{
// Make sure hash for the main file is always updated even if there is no
// index data in it.
Modified: clang-tools-extra/trunk/clangd/index/Background.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.h?rev=347038&r1=347037&r2=347038&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.h (original)
+++ clang-tools-extra/trunk/clangd/index/Background.h Fri Nov 16 01:03:56 2018
@@ -14,6 +14,7 @@
#include "FSProvider.h"
#include "index/FileIndex.h"
#include "index/Index.h"
+#include "index/Serialization.h"
#include "clang/Tooling/CompilationDatabase.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/SHA1.h"
@@ -27,6 +28,31 @@
namespace clang {
namespace clangd {
+// Handles storage and retrieval of index shards. Both store and load
+// operations can be called from multiple-threads concurrently.
+class BackgroundIndexStorage {
+public:
+ // Shards of the index are stored and retrieved independently, keyed by shard
+ // identifier - in practice this is a source file name
+ virtual llvm::Error storeShard(llvm::StringRef ShardIdentifier,
+ IndexFileOut Shard) const = 0;
+
+ // Tries to load shard with given identifier, returns nullptr if shard
+ // couldn't be loaded.
+ virtual std::unique_ptr<IndexFileIn>
+ loadShard(llvm::StringRef ShardIdentifier) const = 0;
+
+ // The factory provides storage for each CDB.
+ // It keeps ownership of the storage instances, and should manage caching
+ // itself. Factory must be threadsafe and never returns nullptr.
+ using Factory =
+ llvm::unique_function<BackgroundIndexStorage *(llvm::StringRef)>;
+
+ // Creates an Index Storage that saves shards into disk. Index storage uses
+ // CDBDirectory + ".clangd-index/" as the folder to save shards.
+ static Factory createDiskBackedStorageFactory();
+};
+
// Builds an in-memory index by by running the static indexer action over
// all commands in a compilation database. Indexing happens in the background.
// FIXME: it should also persist its state on disk for fast start.
@@ -34,8 +60,9 @@ namespace clangd {
class BackgroundIndex : public SwapIndex {
public:
// FIXME: resource-dir injection should be hoisted somewhere common.
- BackgroundIndex(Context BackgroundContext, StringRef ResourceDir,
+ BackgroundIndex(Context BackgroundContext, llvm::StringRef ResourceDir,
const FileSystemProvider &, ArrayRef<std::string> URISchemes,
+ BackgroundIndexStorage::Factory IndexStorageFactory,
size_t ThreadPoolSize = llvm::hardware_concurrency());
~BackgroundIndex(); // Blocks while the current task finishes.
@@ -59,7 +86,8 @@ public:
private:
/// Given index results from a TU, only update files in \p FilesToUpdate.
void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs,
- const llvm::StringMap<FileDigest> &FilesToUpdate);
+ const llvm::StringMap<FileDigest> &FilesToUpdate,
+ BackgroundIndexStorage *IndexStorage);
// configuration
std::string ResourceDir;
@@ -68,16 +96,20 @@ private:
std::vector<std::string> URISchemes;
// index state
- llvm::Error index(tooling::CompileCommand);
+ llvm::Error index(tooling::CompileCommand,
+ BackgroundIndexStorage *IndexStorage);
FileSymbols IndexedSymbols;
llvm::StringMap<FileDigest> IndexedFileDigests; // Key is absolute file path.
std::mutex DigestsMu;
+ BackgroundIndexStorage::Factory IndexStorageFactory;
+
// queue management
using Task = std::function<void()>;
void run(); // Main loop executed by Thread. Runs tasks from Queue.
- void enqueueLocked(tooling::CompileCommand Cmd);
+ void enqueueLocked(tooling::CompileCommand Cmd,
+ BackgroundIndexStorage *IndexStorage);
std::mutex QueueMu;
unsigned NumActiveTasks = 0; // Only idle when queue is empty *and* no tasks.
std::condition_variable QueueCV;
Added: clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp?rev=347038&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp (added)
+++ clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp Fri Nov 16 01:03:56 2018
@@ -0,0 +1,112 @@
+//== BackgroundIndexStorage.cpp - Provide caching support to BackgroundIndex ==/
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Logger.h"
+#include "index/Background.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SHA1.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+using FileDigest = decltype(llvm::SHA1::hash({}));
+
+static FileDigest digest(StringRef Content) {
+ return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
+}
+
+std::string getShardPathFromFilePath(llvm::StringRef ShardRoot,
+ llvm::StringRef FilePath) {
+ llvm::SmallString<128> ShardRootSS(ShardRoot);
+ llvm::sys::path::append(ShardRootSS, llvm::sys::path::filename(FilePath) +
+ "." + llvm::toHex(digest(FilePath)) +
+ ".idx");
+ return ShardRoot.str();
+}
+
+// Uses disk as a storage for index shards. Creates a directory called
+// ".clangd-index/" under the path provided during construction.
+class DiskBackedIndexStorage : public BackgroundIndexStorage {
+ std::string DiskShardRoot;
+
+public:
+ // Sets DiskShardRoot to (Directory + ".clangd-index/") which is the base
+ // directory for all shard files.
+ DiskBackedIndexStorage(llvm::StringRef Directory) {
+ llvm::SmallString<128> CDBDirectory(Directory);
+ llvm::sys::path::append(CDBDirectory, ".clangd-index/");
+ DiskShardRoot = CDBDirectory.str();
+ std::error_code OK;
+ std::error_code EC = llvm::sys::fs::create_directory(DiskShardRoot);
+ if (EC != OK) {
+ elog("Failed to create directory {0} for index storage: {1}",
+ DiskShardRoot, EC.message());
+ }
+ }
+
+ std::unique_ptr<IndexFileIn>
+ loadShard(llvm::StringRef ShardIdentifier) const override {
+ const std::string ShardPath =
+ getShardPathFromFilePath(DiskShardRoot, ShardIdentifier);
+ auto Buffer = llvm::MemoryBuffer::getFile(ShardPath);
+ if (!Buffer)
+ return nullptr;
+ if (auto I = readIndexFile(Buffer->get()->getBuffer()))
+ return llvm::make_unique<IndexFileIn>(std::move(*I));
+ else
+ elog("Error while reading shard {0}: {1}", ShardIdentifier,
+ I.takeError());
+ return nullptr;
+ }
+
+ llvm::Error storeShard(llvm::StringRef ShardIdentifier,
+ IndexFileOut Shard) const override {
+ auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier);
+ std::error_code EC;
+ llvm::raw_fd_ostream OS(ShardPath, EC);
+ if (EC)
+ return llvm::errorCodeToError(EC);
+ OS << Shard;
+ OS.close();
+ return llvm::errorCodeToError(OS.error());
+ }
+};
+
+// Creates and owns IndexStorages for multiple CDBs.
+class DiskBackedIndexStorageManager {
+public:
+ // Creates or fetches to storage from cache for the specified CDB.
+ BackgroundIndexStorage *operator()(llvm::StringRef CDBDirectory) {
+ std::lock_guard<std::mutex> Lock(*IndexStorageMapMu);
+ auto &IndexStorage = IndexStorageMap[CDBDirectory];
+ if (!IndexStorage)
+ IndexStorage = llvm::make_unique<DiskBackedIndexStorage>(CDBDirectory);
+ return IndexStorage.get();
+ }
+
+ // Creates or fetches to storage from cache for the specified CDB.
+ BackgroundIndexStorage *createStorage(llvm::StringRef CDBDirectory);
+
+private:
+ llvm::StringMap<std::unique_ptr<BackgroundIndexStorage>> IndexStorageMap;
+ std::unique_ptr<std::mutex> IndexStorageMapMu;
+};
+
+} // namespace
+
+BackgroundIndexStorage::Factory
+BackgroundIndexStorage::createDiskBackedStorageFactory() {
+ return DiskBackedIndexStorageManager();
+}
+
+} // namespace clangd
+} // namespace clang
Modified: clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp?rev=347038&r1=347037&r2=347038&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp Fri Nov 16 01:03:56 2018
@@ -1,6 +1,7 @@
#include "SyncAPI.h"
#include "TestFS.h"
#include "index/Background.h"
+#include "llvm/Support/ScopedPrinter.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@@ -24,6 +25,37 @@ RefsAre(std::vector<testing::Matcher<Ref
return ElementsAre(testing::Pair(_, UnorderedElementsAreArray(Matchers)));
}
+class MemoryShardStorage : public BackgroundIndexStorage {
+ mutable std::mutex StorageMu;
+ llvm::StringMap<std::string> &Storage;
+ size_t &CacheHits;
+
+public:
+ MemoryShardStorage(llvm::StringMap<std::string> &Storage, size_t &CacheHits)
+ : Storage(Storage), CacheHits(CacheHits) {}
+ llvm::Error storeShard(llvm::StringRef ShardIdentifier,
+ IndexFileOut Shard) const override {
+ std::lock_guard<std::mutex> Lock(StorageMu);
+ Storage[ShardIdentifier] = llvm::to_string(Shard);
+ return llvm::Error::success();
+ }
+ std::unique_ptr<IndexFileIn>
+ loadShard(llvm::StringRef ShardIdentifier) const override {
+ std::lock_guard<std::mutex> Lock(StorageMu);
+ if (Storage.find(ShardIdentifier) == Storage.end()) {
+ return nullptr;
+ }
+ auto IndexFile = readIndexFile(Storage[ShardIdentifier]);
+ if (!IndexFile) {
+ ADD_FAILURE() << "Error while reading " << ShardIdentifier << ':'
+ << IndexFile.takeError();
+ return nullptr;
+ }
+ CacheHits++;
+ return llvm::make_unique<IndexFileIn>(std::move(*IndexFile));
+ }
+};
+
TEST(BackgroundIndexTest, IndexTwoFiles) {
MockFSProvider FS;
// a.h yields different symbols when included by A.cc vs B.cc.
@@ -45,7 +77,11 @@ TEST(BackgroundIndexTest, IndexTwoFiles)
void f_b() {
(void)common;
})cpp";
- BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"});
+ llvm::StringMap<std::string> Storage;
+ size_t CacheHits = 0;
+ MemoryShardStorage MSS(Storage, CacheHits);
+ BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"},
+ [&](llvm::StringRef) { return &MSS; });
tooling::CompileCommand Cmd;
Cmd.Filename = testPath("root/A.cc");
@@ -78,5 +114,49 @@ TEST(BackgroundIndexTest, IndexTwoFiles)
FileURI("unittest:///root/B.cc")}));
}
+TEST(BackgroundIndexTest, ShardStorageWriteTest) {
+ MockFSProvider FS;
+ FS.Files[testPath("root/A.h")] = R"cpp(
+ void common();
+ void f_b();
+ class A_CC {};
+ )cpp";
+ FS.Files[testPath("root/A.cc")] =
+ "#include \"A.h\"\nvoid g() { (void)common; }";
+
+ llvm::StringMap<std::string> Storage;
+ size_t CacheHits = 0;
+ MemoryShardStorage MSS(Storage, CacheHits);
+
+ tooling::CompileCommand Cmd;
+ Cmd.Filename = testPath("root/A.cc");
+ Cmd.Directory = testPath("root");
+ Cmd.CommandLine = {"clang++", testPath("root/A.cc")};
+ // Check nothing is loaded from Storage, but A.cc and A.h has been stored.
+ {
+ BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"},
+ [&](llvm::StringRef) { return &MSS; });
+ Idx.enqueue(testPath("root"), Cmd);
+ Idx.blockUntilIdleForTest();
+ }
+ EXPECT_EQ(CacheHits, 0U);
+ EXPECT_EQ(Storage.size(), 2U);
+
+ auto ShardHeader = MSS.loadShard(testPath("root/A.h"));
+ EXPECT_NE(ShardHeader, nullptr);
+ EXPECT_THAT(
+ *ShardHeader->Symbols,
+ UnorderedElementsAre(Named("common"), Named("A_CC"),
+ AllOf(Named("f_b"), Declared(), Not(Defined()))));
+ for (const auto &Ref : *ShardHeader->Refs)
+ EXPECT_THAT(Ref.second,
+ UnorderedElementsAre(FileURI("unittest:///root/A.h")));
+
+ auto ShardSource = MSS.loadShard(testPath("root/A.cc"));
+ EXPECT_NE(ShardSource, nullptr);
+ EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre());
+ EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")}));
+}
+
} // namespace clangd
} // namespace clang
More information about the cfe-commits
mailing list