[clang-tools-extra] r343759 - [clangd] clangd-indexer: Drop support for MR-via-YAML

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 4 01:30:04 PDT 2018


Author: sammccall
Date: Thu Oct  4 01:30:03 2018
New Revision: 343759

URL: http://llvm.org/viewvc/llvm-project?rev=343759&view=rev
Log:
[clangd] clangd-indexer: Drop support for MR-via-YAML

Summary:
It's slow, and the open-source reduce implementation doesn't scale properly.
While here, tidy up some dead headers and comments.

Reviewers: kadircet

Subscribers: ilya-biryukov, ioeric, MaskRay, jkorous, arphaman, cfe-commits

Differential Revision: https://reviews.llvm.org/D52517

Modified:
    clang-tools-extra/trunk/clangd/index/Serialization.h
    clang-tools-extra/trunk/clangd/index/YAMLSerialization.cpp
    clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp

Modified: clang-tools-extra/trunk/clangd/index/Serialization.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.h?rev=343759&r1=343758&r2=343759&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Serialization.h (original)
+++ clang-tools-extra/trunk/clangd/index/Serialization.h Thu Oct  4 01:30:03 2018
@@ -27,11 +27,6 @@
 #include "Index.h"
 #include "llvm/Support/Error.h"
 
-namespace llvm {
-namespace yaml {
-class Input;
-}
-} // namespace llvm
 namespace clang {
 namespace clangd {
 
@@ -61,10 +56,8 @@ struct IndexFileOut {
 // Serializes an index file.
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O);
 
+// Convert a single symbol to YAML, a nice debug representation.
 std::string toYAML(const Symbol &);
-// Returned symbol is backed by the YAML input.
-// FIXME: this is only needed for IndexerMain, find a better solution.
-llvm::Expected<Symbol> symbolFromYAML(llvm::yaml::Input &);
 
 // Build an in-memory static index from an index file.
 // The size should be relatively small, so data can be managed in memory.

Modified: clang-tools-extra/trunk/clangd/index/YAMLSerialization.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/YAMLSerialization.cpp?rev=343759&r1=343758&r2=343759&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/YAMLSerialization.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/YAMLSerialization.cpp Thu Oct  4 01:30:03 2018
@@ -218,13 +218,5 @@ std::string toYAML(const Symbol &S) {
   return Buf;
 }
 
-Expected<Symbol> symbolFromYAML(llvm::yaml::Input &Yin) {
-  Symbol S;
-  Yin >> S;
-  if (Yin.error())
-    return llvm::errorCodeToError(Yin.error());
-  return S;
-}
-
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp?rev=343759&r1=343758&r2=343759&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp (original)
+++ clang-tools-extra/trunk/clangd/indexer/IndexerMain.cpp Thu Oct  4 01:30:03 2018
@@ -12,26 +12,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "RIFF.h"
-#include "index/CanonicalIncludes.h"
 #include "index/Index.h"
 #include "index/IndexAction.h"
 #include "index/Merge.h"
 #include "index/Serialization.h"
 #include "index/SymbolCollector.h"
-#include "clang/Frontend/CompilerInstance.h"
-#include "clang/Frontend/FrontendActions.h"
-#include "clang/Index/IndexDataConsumer.h"
-#include "clang/Index/IndexingAction.h"
 #include "clang/Tooling/CommonOptionsParser.h"
 #include "clang/Tooling/Execution.h"
 #include "clang/Tooling/Tooling.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/Support/ThreadPool.h"
-#include "llvm/Support/YAMLTraits.h"
 
 using namespace llvm;
 using namespace clang::tooling;
@@ -50,16 +41,6 @@ static llvm::cl::opt<std::string> Assume
                    "not given, such headers will have relative paths."),
     llvm::cl::init(""));
 
-static llvm::cl::opt<bool> MergeOnTheFly(
-    "merge-on-the-fly",
-    llvm::cl::desc(
-        "Merges symbols for each processed translation unit as soon "
-        "they become available. This results in a smaller memory "
-        "usage and an almost instant reduce stage. Optimal for running as a "
-        "standalone tool, but cannot be used with multi-process executors like "
-        "MapReduce."),
-    llvm::cl::init(true), llvm::cl::Hidden);
-
 static llvm::cl::opt<IndexFileFormat>
     Format("format", llvm::cl::desc("Format of the index to be written"),
            llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml",
@@ -68,89 +49,36 @@ static llvm::cl::opt<IndexFileFormat>
                                        "binary RIFF format")),
            llvm::cl::init(IndexFileFormat::YAML));
 
-/// Responsible for aggregating symbols from each processed file and producing
-/// the final results. All methods in this class must be thread-safe,
-/// 'consumeSymbols' may be called from multiple threads.
-class SymbolsConsumer {
-public:
-  virtual ~SymbolsConsumer() = default;
-
-  /// Consume a SymbolSlab build for a file.
-  virtual void consumeSymbols(SymbolSlab Symbols) = 0;
-  /// Produce a resulting symbol slab, by combining  occurrences of the same
-  /// symbols across translation units.
-  virtual SymbolSlab mergeResults() = 0;
-};
-
-class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
+class IndexActionFactory : public tooling::FrontendActionFactory {
 public:
-  SymbolIndexActionFactory(SymbolsConsumer &Consumer) : Consumer(Consumer) {}
+  IndexActionFactory(IndexFileIn &Result) : Result(Result) {}
 
   clang::FrontendAction *create() override {
-    auto CollectorOpts = SymbolCollector::Options();
-    CollectorOpts.FallbackDir = AssumedHeaderDir;
+    SymbolCollector::Options Opts;
+    Opts.FallbackDir = AssumedHeaderDir;
     return createStaticIndexingAction(
-               CollectorOpts,
-               [&](SymbolSlab S) { Consumer.consumeSymbols(std::move(S)); })
+               Opts,
+               [&](SymbolSlab S) {
+                 // Merge as we go.
+                 std::lock_guard<std::mutex> Lock(SymbolsMu);
+                 for (const auto &Sym : S) {
+                   if (const auto *Existing = Symbols.find(Sym.ID))
+                     Symbols.insert(mergeSymbol(*Existing, Sym));
+                   else
+                     Symbols.insert(Sym);
+                 }
+               })
         .release();
   }
 
-  SymbolsConsumer &Consumer;
-};
-
-/// Stashes per-file results inside ExecutionContext, merges all of them at the
-/// end. Useful for running on MapReduce infrastructure to avoid keeping symbols
-/// from multiple files in memory.
-class ToolExecutorConsumer : public SymbolsConsumer {
-public:
-  ToolExecutorConsumer(ToolExecutor &Executor) : Executor(Executor) {}
-
-  void consumeSymbols(SymbolSlab Symbols) override {
-    for (const auto &Sym : Symbols)
-      Executor.getExecutionContext()->reportResult(Sym.ID.str(), toYAML(Sym));
-  }
-
-  SymbolSlab mergeResults() override {
-    SymbolSlab::Builder UniqueSymbols;
-    Executor.getToolResults()->forEachResult(
-        [&](llvm::StringRef Key, llvm::StringRef Value) {
-          llvm::yaml::Input Yin(Value);
-          auto Sym = cantFail(clang::clangd::symbolFromYAML(Yin));
-          auto ID = cantFail(clang::clangd::SymbolID::fromStr(Key));
-          if (const auto *Existing = UniqueSymbols.find(ID))
-            UniqueSymbols.insert(mergeSymbol(*Existing, Sym));
-          else
-            UniqueSymbols.insert(Sym);
-        });
-    return std::move(UniqueSymbols).build();
-  }
-
-private:
-  ToolExecutor &Executor;
-};
-
-/// Merges symbols for each translation unit as soon as the file is processed.
-/// Optimal choice for standalone tools.
-class OnTheFlyConsumer : public SymbolsConsumer {
-public:
-  void consumeSymbols(SymbolSlab Symbols) override {
-    std::lock_guard<std::mutex> Lock(Mut);
-    for (auto &&Sym : Symbols) {
-      if (const auto *Existing = Result.find(Sym.ID))
-        Result.insert(mergeSymbol(*Existing, Sym));
-      else
-        Result.insert(Sym);
-    }
-  }
-
-  SymbolSlab mergeResults() override {
-    std::lock_guard<std::mutex> Lock(Mut);
-    return std::move(Result).build();
-  }
+  // Awkward: we write the result in the destructor, because the executor
+  // takes ownership so it's the easiest way to get our data back out.
+  ~IndexActionFactory() { Result.Symbols = std::move(Symbols).build(); }
 
 private:
-  std::mutex Mut;
-  SymbolSlab::Builder Result;
+  IndexFileIn &Result;
+  std::mutex SymbolsMu;
+  SymbolSlab::Builder Symbols;
 };
 
 } // namespace
@@ -161,12 +89,10 @@ int main(int argc, const char **argv) {
   llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
 
   const char *Overview = R"(
-  This is an **experimental** tool to extract symbols from a whole project
-  for clangd (global code completion). It will be changed and deprecated
-  eventually. Don't use it in production code!
+  Creates an index of symbol information etc in a whole project.
+  This is **experimental** and not production-ready!
 
-  Example usage for building index for the whole project using CMake compile
-  commands:
+  Example usage for a project using CMake compile commands:
 
   $ clangd-indexer --executor=all-TUs compile_commands.json > index.yaml
 
@@ -174,7 +100,7 @@ int main(int argc, const char **argv) {
 
   $ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > index.yaml
 
-  Note: only symbols from header files will be collected.
+  Note: only symbols from header files will be indexed.
   )";
 
   auto Executor = clang::tooling::createExecutorFromCommandLineArgs(
@@ -191,31 +117,16 @@ int main(int argc, const char **argv) {
     return 1;
   }
 
-  if (clang::clangd::MergeOnTheFly && !Executor->get()->isSingleProcess()) {
-    llvm::errs()
-        << "Found multi-process executor, forcing the use of intermediate YAML "
-           "serialization instead of the on-the-fly merge.\n";
-    clang::clangd::MergeOnTheFly = false;
-  }
-
-  std::unique_ptr<clang::clangd::SymbolsConsumer> Consumer;
-  if (clang::clangd::MergeOnTheFly)
-    Consumer = llvm::make_unique<clang::clangd::OnTheFlyConsumer>();
-  else
-    Consumer =
-        llvm::make_unique<clang::clangd::ToolExecutorConsumer>(**Executor);
-
-  // Map phase: emit symbols found in each translation unit.
+  // Collect symbols found in each translation unit, merging as we go.
+  clang::clangd::IndexFileIn Data;
   auto Err = Executor->get()->execute(
-      llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(*Consumer));
+      llvm::make_unique<clang::clangd::IndexActionFactory>(Data));
   if (Err) {
     llvm::errs() << llvm::toString(std::move(Err)) << "\n";
   }
-  // Reduce phase: combine symbols with the same IDs.
-  auto UniqueSymbols = Consumer->mergeResults();
-  // Output phase: emit result symbols.
-  clang::clangd::IndexFileOut Out;
-  Out.Symbols = &UniqueSymbols;
+
+  // Emit collected data.
+  clang::clangd::IndexFileOut Out(Data);
   Out.Format = clang::clangd::Format;
   llvm::outs() << Out;
   return 0;




More information about the cfe-commits mailing list