[clang-tools-extra] r344513 - [clangd] Minimal implementation of automatic static index (not enabled).

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 15 06:34:11 PDT 2018


Author: sammccall
Date: Mon Oct 15 06:34:10 2018
New Revision: 344513

URL: http://llvm.org/viewvc/llvm-project?rev=344513&view=rev
Log:
[clangd] Minimal implementation of automatic static index (not enabled).

Summary:
See tinyurl.com/clangd-automatic-index for design and goals.

Lots of limitations to keep this patch smallish, TODOs everywhere:
 - no serialization to disk
 - no changes to dynamic index, which now has a much simpler job
 - no partitioning of symbols by file to avoid duplication of header symbols
 - no reindexing of edited files
 - only a single worker thread
 - compilation database is slurped synchronously (doesn't scale)
 - uses memindex, rebuilds after every file (should be dex, periodically)

It's not hooked up to ClangdServer/ClangdLSPServer yet: the layering
isn't clear (it should really be in ClangdServer, but ClangdLSPServer
has all the CDB interactions).

Reviewers: ioeric

Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, jfb, cfe-commits

Differential Revision: https://reviews.llvm.org/D53032

Added:
    clang-tools-extra/trunk/clangd/index/Background.cpp
    clang-tools-extra/trunk/clangd/index/Background.h
    clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp
Modified:
    clang-tools-extra/trunk/clangd/CMakeLists.txt
    clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
    clang-tools-extra/trunk/unittests/clangd/SyncAPI.cpp
    clang-tools-extra/trunk/unittests/clangd/SyncAPI.h

Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=344513&r1=344512&r2=344513&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt Mon Oct 15 06:34:10 2018
@@ -38,6 +38,7 @@ add_clang_library(clangDaemon
   URI.cpp
   XRefs.cpp
 
+  index/Background.cpp
   index/CanonicalIncludes.cpp
   index/FileIndex.cpp
   index/Index.cpp

Added: clang-tools-extra/trunk/clangd/index/Background.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.cpp?rev=344513&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.cpp (added)
+++ clang-tools-extra/trunk/clangd/index/Background.cpp Mon Oct 15 06:34:10 2018
@@ -0,0 +1,191 @@
+//===-- Background.cpp - Build an index in a background thread ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "index/Background.h"
+#include "ClangdUnit.h"
+#include "Compiler.h"
+#include "Logger.h"
+#include "Trace.h"
+#include "index/IndexAction.h"
+#include "index/MemIndex.h"
+#include "index/Serialization.h"
+#include "llvm/Support/SHA1.h"
+#include <random>
+
+using namespace llvm;
+namespace clang {
+namespace clangd {
+
+BackgroundIndex::BackgroundIndex(Context BackgroundContext,
+                                 StringRef ResourceDir,
+                                 const FileSystemProvider &FSProvider)
+    : SwapIndex(llvm::make_unique<MemIndex>()), ResourceDir(ResourceDir),
+      FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)),
+      Thread([this] { run(); }) {}
+
+BackgroundIndex::~BackgroundIndex() {
+  stop();
+  Thread.join();
+}
+
+void BackgroundIndex::stop() {
+  {
+    std::lock_guard<std::mutex> Lock(QueueMu);
+    ShouldStop = true;
+  }
+  QueueCV.notify_all();
+}
+
+void BackgroundIndex::run() {
+  WithContext Background(std::move(BackgroundContext));
+  while (true) {
+    llvm::Optional<Task> Task;
+    {
+      std::unique_lock<std::mutex> Lock(QueueMu);
+      QueueCV.wait(Lock, [&] { return ShouldStop || !Queue.empty(); });
+      if (ShouldStop) {
+        Queue.clear();
+        QueueCV.notify_all();
+        return;
+      }
+      ++NumActiveTasks;
+      Task = std::move(Queue.front());
+      Queue.pop_front();
+    }
+    (*Task)();
+    {
+      std::unique_lock<std::mutex> Lock(QueueMu);
+      assert(NumActiveTasks > 0 && "before decrementing");
+      --NumActiveTasks;
+    }
+    QueueCV.notify_all();
+  }
+}
+
+void BackgroundIndex::blockUntilIdleForTest() {
+  std::unique_lock<std::mutex> Lock(QueueMu);
+  QueueCV.wait(Lock, [&] { return Queue.empty() && NumActiveTasks == 0; });
+}
+
+void BackgroundIndex::enqueue(StringRef Directory,
+                              tooling::CompileCommand Cmd) {
+  std::lock_guard<std::mutex> Lock(QueueMu);
+  enqueueLocked(std::move(Cmd));
+}
+
+void BackgroundIndex::enqueueAll(StringRef Directory,
+                                 const tooling::CompilationDatabase &CDB) {
+  trace::Span Tracer("BackgroundIndexEnqueueCDB");
+  // FIXME: this function may be slow. Perhaps enqueue a task to re-read the CDB
+  // from disk and enqueue the commands asynchronously?
+  auto Cmds = CDB.getAllCompileCommands();
+  SPAN_ATTACH(Tracer, "commands", int64_t(Cmds.size()));
+  std::mt19937 Generator(std::random_device{}());
+  std::shuffle(Cmds.begin(), Cmds.end(), Generator);
+  log("Enqueueing {0} commands for indexing from {1}", Cmds.size(), Directory);
+  {
+    std::lock_guard<std::mutex> Lock(QueueMu);
+    for (auto &Cmd : Cmds)
+      enqueueLocked(std::move(Cmd));
+  }
+  QueueCV.notify_all();
+}
+
+void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) {
+  Queue.push_back(Bind(
+      [this](tooling::CompileCommand Cmd) {
+        std::string Filename = Cmd.Filename;
+        Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir);
+        if (auto Error = index(std::move(Cmd)))
+          log("Indexing {0} failed: {1}", Filename, std::move(Error));
+      },
+      std::move(Cmd)));
+}
+
+llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
+  trace::Span Tracer("BackgroundIndex");
+  SPAN_ATTACH(Tracer, "file", Cmd.Filename);
+  SmallString<128> AbsolutePath;
+  if (llvm::sys::path::is_absolute(Cmd.Filename)) {
+    AbsolutePath = Cmd.Filename;
+  } else {
+    AbsolutePath = Cmd.Directory;
+    llvm::sys::path::append(AbsolutePath, Cmd.Filename);
+  }
+
+  auto FS = FSProvider.getFileSystem();
+  auto Buf = FS->getBufferForFile(AbsolutePath);
+  if (!Buf)
+    return errorCodeToError(Buf.getError());
+  StringRef Contents = Buf->get()->getBuffer();
+  auto Hash = SHA1::hash({(const uint8_t *)Contents.data(), Contents.size()});
+
+  if (FileHash.lookup(AbsolutePath) == Hash) {
+    vlog("No need to index {0}, already up to date", AbsolutePath);
+    return Error::success();
+  }
+
+  log("Indexing {0}", Cmd.Filename, toHex(Hash));
+  ParseInputs Inputs;
+  Inputs.FS = std::move(FS);
+  Inputs.FS->setCurrentWorkingDirectory(Cmd.Directory);
+  Inputs.CompileCommand = std::move(Cmd);
+  auto CI = buildCompilerInvocation(Inputs);
+  if (!CI)
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "Couldn't build compiler invocation");
+  IgnoreDiagnostics IgnoreDiags;
+  auto Clang = prepareCompilerInstance(
+      std::move(CI), /*Preamble=*/nullptr, std::move(*Buf),
+      std::make_shared<PCHContainerOperations>(), Inputs.FS, IgnoreDiags);
+  if (!Clang)
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "Couldn't build compiler instance");
+
+  SymbolCollector::Options IndexOpts;
+  SymbolSlab Symbols;
+  RefSlab Refs;
+  IndexFileIn IndexData;
+  auto Action = createStaticIndexingAction(
+      IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); },
+      [&](RefSlab R) { Refs = std::move(R); });
+
+  // We're going to run clang here, and it could potentially crash.
+  // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
+  // but the leaky "recovery" is pretty scary too in a long-running process.
+  // If crashes are a real problem, maybe we should fork a child process.
+
+  const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front();
+  if (!Action->BeginSourceFile(*Clang, Input))
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "BeginSourceFile() failed");
+  if (!Action->Execute())
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "Execute() failed");
+  Action->EndSourceFile();
+
+  log("Indexed {0} ({1} symbols, {2} refs)", Inputs.CompileCommand.Filename,
+      Symbols.size(), Refs.size());
+  SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
+  SPAN_ATTACH(Tracer, "refs", int(Refs.size()));
+  // FIXME: partition the symbols by file rather than TU, to avoid duplication.
+  IndexedSymbols.update(AbsolutePath,
+                        llvm::make_unique<SymbolSlab>(std::move(Symbols)),
+                        llvm::make_unique<RefSlab>(std::move(Refs)));
+  FileHash[AbsolutePath] = Hash;
+
+  // FIXME: this should rebuild once-in-a-while, not after every file.
+  //       At that point we should use Dex, too.
+  vlog("Rebuilding automatic index");
+  reset(IndexedSymbols.buildMemIndex());
+  return Error::success();
+}
+
+} // namespace clangd
+} // namespace clang

Added: clang-tools-extra/trunk/clangd/index/Background.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.h?rev=344513&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.h (added)
+++ clang-tools-extra/trunk/clangd/index/Background.h Mon Oct 15 06:34:10 2018
@@ -0,0 +1,79 @@
+//===--- Background.h - Build an index in a background thread ----*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H
+
+#include "Context.h"
+#include "FSProvider.h"
+#include "index/FileIndex.h"
+#include "index/Index.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/Support/SHA1.h"
+#include <condition_variable>
+#include <deque>
+#include <thread>
+
+namespace clang {
+namespace clangd {
+
+// Builds an in-memory index by by running the static indexer action over
+// all commands in a compilation database. Indexing happens in the background.
+// FIXME: it should also persist its state on disk for fast start.
+// FIXME: it should watch for changes to files on disk.
+class BackgroundIndex : public SwapIndex {
+public:
+  // FIXME: resource-dir injection should be hoisted somewhere common.
+  BackgroundIndex(Context BackgroundContext, StringRef ResourceDir,
+                  const FileSystemProvider &);
+  ~BackgroundIndex(); // Blocks while the current task finishes.
+
+  // Enqueue a translation unit for indexing.
+  // The indexing happens in a background thread, so the symbols will be
+  // available sometime later.
+  void enqueue(llvm::StringRef Directory, tooling::CompileCommand);
+  // Index all TUs described in the compilation database.
+  void enqueueAll(llvm::StringRef Directory,
+                  const tooling::CompilationDatabase &);
+
+  // Cause background threads to stop after ther current task, any remaining
+  // tasks will be discarded.
+  void stop();
+
+  // Wait until the queue is empty, to allow deterministic testing.
+  void blockUntilIdleForTest();
+
+private:
+  // configuration
+  std::string ResourceDir;
+  const FileSystemProvider &FSProvider;
+  Context BackgroundContext;
+
+  // index state
+  llvm::Error index(tooling::CompileCommand);
+  FileSymbols IndexedSymbols; // Index contents.
+  using Hash = decltype(llvm::SHA1::hash({}));
+  llvm::StringMap<Hash> FileHash; // Digest of indexed file.
+
+  // queue management
+  using Task = std::function<void()>; // FIXME: use multiple worker threads.
+  void run(); // Main loop executed by Thread. Runs tasks from Queue.
+  void enqueueLocked(tooling::CompileCommand Cmd);
+  std::thread Thread;
+  std::mutex QueueMu;
+  unsigned NumActiveTasks = 0; // Only idle when queue is empty *and* no tasks.
+  std::condition_variable QueueCV;
+  bool ShouldStop = false;
+  std::deque<Task> Queue;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif

Added: clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp?rev=344513&view=auto
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp (added)
+++ clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp Mon Oct 15 06:34:10 2018
@@ -0,0 +1,37 @@
+#include "SyncAPI.h"
+#include "TestFS.h"
+#include "index/Background.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::UnorderedElementsAre;
+
+namespace clang {
+namespace clangd {
+
+MATCHER_P(Named, N, "") { return arg.Name == N; }
+
+TEST(BackgroundIndexTest, IndexTwoFiles) {
+  MockFSProvider FS;
+  // a.h yields different symbols when included by A.cc vs B.cc.
+  // Currently we store symbols for each TU, so we get both.
+  FS.Files[testPath("root/A.h")] = "void a_h(); void NAME(){}";
+  FS.Files[testPath("root/A.cc")] = "#include \"A.h\"";
+  FS.Files[testPath("root/B.cc")] = "#define NAME bar\n#include \"A.h\"";
+  BackgroundIndex Idx(Context::empty(), "", FS);
+
+  tooling::CompileCommand Cmd;
+  Cmd.Filename = testPath("root/A.cc");
+  Cmd.Directory = testPath("root");
+  Cmd.CommandLine = {"clang++", "-DNAME=foo", testPath("root/A.cc")};
+  Idx.enqueue(testPath("root"), Cmd);
+  Cmd.CommandLine.back() = Cmd.Filename = testPath("root/B.cc");
+  Idx.enqueue(testPath("root"), Cmd);
+
+  Idx.blockUntilIdleForTest();
+  EXPECT_THAT(runFuzzyFind(Idx, ""),
+              UnorderedElementsAre(Named("a_h"), Named("foo"), Named("bar")));
+}
+
+} // namespace clangd
+} // namespace clang

Modified: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt?rev=344513&r1=344512&r2=344513&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt Mon Oct 15 06:34:10 2018
@@ -10,6 +10,7 @@ include_directories(
 
 add_extra_unittest(ClangdTests
   Annotations.cpp
+  BackgroundIndexTests.cpp
   CancellationTests.cpp
   ClangdTests.cpp
   ClangdUnitTests.cpp

Modified: clang-tools-extra/trunk/unittests/clangd/SyncAPI.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SyncAPI.cpp?rev=344513&r1=344512&r2=344513&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SyncAPI.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/SyncAPI.cpp Mon Oct 15 06:34:10 2018
@@ -125,5 +125,17 @@ runDocumentSymbols(ClangdServer &Server,
   return std::move(*Result);
 }
 
+SymbolSlab runFuzzyFind(const SymbolIndex &Index, StringRef Query) {
+  FuzzyFindRequest Req;
+  Req.Query = Query;
+  return runFuzzyFind(Index, Req);
+}
+
+SymbolSlab runFuzzyFind(const SymbolIndex &Index, const FuzzyFindRequest &Req) {
+  SymbolSlab::Builder Builder;
+  Index.fuzzyFind(Req, [&](const Symbol &Sym) { Builder.insert(Sym); });
+  return std::move(Builder).build();
+}
+
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/unittests/clangd/SyncAPI.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SyncAPI.h?rev=344513&r1=344512&r2=344513&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SyncAPI.h (original)
+++ clang-tools-extra/trunk/unittests/clangd/SyncAPI.h Mon Oct 15 06:34:10 2018
@@ -17,7 +17,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_SYNCAPI_H
 
 #include "ClangdServer.h"
-#include <future>
+#include "index/Index.h"
 
 namespace clang {
 namespace clangd {
@@ -50,6 +50,9 @@ runWorkspaceSymbols(ClangdServer &Server
 llvm::Expected<std::vector<SymbolInformation>>
 runDocumentSymbols(ClangdServer &Server, PathRef File);
 
+SymbolSlab runFuzzyFind(const SymbolIndex &Index, StringRef Query);
+SymbolSlab runFuzzyFind(const SymbolIndex &Index, const FuzzyFindRequest &Req);
+
 } // namespace clangd
 } // namespace clang
 




More information about the cfe-commits mailing list