[clang-tools-extra] r340600 - [clangd] Allow to merge symbols on-the-fly in global-symbol-builder

Ilya Biryukov via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 24 02:03:54 PDT 2018


Author: ibiryukov
Date: Fri Aug 24 02:03:54 2018
New Revision: 340600

URL: http://llvm.org/viewvc/llvm-project?rev=340600&view=rev
Log:
[clangd] Allow to merge symbols on-the-fly in global-symbol-builder

Summary:
The new mode avoids serializing and deserializing YAML.
This results in better performance and less memory usage. Reduce phase
is now almost instant.

The default is to use the old mode going through YAML serialization to
allow migrating MapReduce clients that require the old mode to operate
properly. After we migrate the clients, we can switch the default to
the new mode.

Reviewers: hokein, ioeric, kbobyrev, sammccall

Reviewed By: ioeric

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D51155

Modified:
    clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp

Modified: clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp?rev=340600&r1=340599&r2=340600&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp (original)
+++ clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp Fri Aug 24 02:03:54 2018
@@ -49,9 +49,33 @@ static llvm::cl::opt<std::string> Assume
                    "not given, such headers will have relative paths."),
     llvm::cl::init(""));
 
+static llvm::cl::opt<bool> MergeOnTheFly(
+    "merge-on-the-fly",
+    llvm::cl::desc(
+        "Merges symbols for each processed translation unit as soon "
+        "they become available. This results in a smaller memory "
+        "usage and an almost instant reduce stage. Optimal for running as a "
+        "standalone tool, but cannot be used with multi-process executors like "
+        "MapReduce."),
+    llvm::cl::init(true), llvm::cl::Hidden);
+
+/// Responsible for aggregating symbols from each processed file and producing
+/// the final results. All methods in this class must be thread-safe,
+/// 'consumeSymbols' may be called from multiple threads.
+class SymbolsConsumer {
+public:
+  virtual ~SymbolsConsumer() = default;
+
+  /// Consume a SymbolSlab build for a file.
+  virtual void consumeSymbols(SymbolSlab Symbols) = 0;
+  /// Produce a resulting symbol slab, by combining  occurrences of the same
+  /// symbols across translation units.
+  virtual SymbolSlab mergeResults() = 0;
+};
+
 class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
 public:
-  SymbolIndexActionFactory(tooling::ExecutionContext *Ctx) : Ctx(Ctx) {}
+  SymbolIndexActionFactory(SymbolsConsumer &Consumer) : Consumer(Consumer) {}
 
   clang::FrontendAction *create() override {
     // Wraps the index action and reports collected symbols to the execution
@@ -61,10 +85,10 @@ public:
       WrappedIndexAction(std::shared_ptr<SymbolCollector> C,
                          std::unique_ptr<CanonicalIncludes> Includes,
                          const index::IndexingOptions &Opts,
-                         tooling::ExecutionContext *Ctx)
+                         SymbolsConsumer &Consumer)
           : WrapperFrontendAction(
                 index::createIndexingAction(C, Opts, nullptr)),
-            Ctx(Ctx), Collector(C), Includes(std::move(Includes)),
+            Consumer(Consumer), Collector(C), Includes(std::move(Includes)),
             PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {}
 
       std::unique_ptr<ASTConsumer>
@@ -91,14 +115,11 @@ public:
           return;
         }
 
-        auto Symbols = Collector->takeSymbols();
-        for (const auto &Sym : Symbols) {
-          Ctx->reportResult(Sym.ID.str(), SymbolToYAML(Sym));
-        }
+        Consumer.consumeSymbols(Collector->takeSymbols());
       }
 
     private:
-      tooling::ExecutionContext *Ctx;
+      SymbolsConsumer &Consumer;
       std::shared_ptr<SymbolCollector> Collector;
       std::unique_ptr<CanonicalIncludes> Includes;
       std::unique_ptr<CommentHandler> PragmaHandler;
@@ -118,30 +139,72 @@ public:
     CollectorOpts.Includes = Includes.get();
     return new WrappedIndexAction(
         std::make_shared<SymbolCollector>(std::move(CollectorOpts)),
-        std::move(Includes), IndexOpts, Ctx);
+        std::move(Includes), IndexOpts, Consumer);
+  }
+
+  SymbolsConsumer &Consumer;
+};
+
+/// Stashes per-file results inside ExecutionContext, merges all of them at the
+/// end. Useful for running on MapReduce infrastructure to avoid keeping symbols
+/// from multiple files in memory.
+class ToolExecutorConsumer : public SymbolsConsumer {
+public:
+  ToolExecutorConsumer(ToolExecutor &Executor) : Executor(Executor) {}
+
+  void consumeSymbols(SymbolSlab Symbols) override {
+    for (const auto &Sym : Symbols)
+      Executor.getExecutionContext()->reportResult(Sym.ID.str(),
+                                                   SymbolToYAML(Sym));
+  }
+
+  SymbolSlab mergeResults() override {
+    SymbolSlab::Builder UniqueSymbols;
+    llvm::BumpPtrAllocator Arena;
+    Symbol::Details Scratch;
+    Executor.getToolResults()->forEachResult(
+        [&](llvm::StringRef Key, llvm::StringRef Value) {
+          Arena.Reset();
+          llvm::yaml::Input Yin(Value, &Arena);
+          auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena);
+          clang::clangd::SymbolID ID;
+          Key >> ID;
+          if (const auto *Existing = UniqueSymbols.find(ID))
+            UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch));
+          else
+            UniqueSymbols.insert(Sym);
+        });
+    return std::move(UniqueSymbols).build();
   }
 
-  tooling::ExecutionContext *Ctx;
+private:
+  ToolExecutor &Executor;
 };
 
-// Combine occurrences of the same symbol across translation units.
-SymbolSlab mergeSymbols(tooling::ToolResults *Results) {
-  SymbolSlab::Builder UniqueSymbols;
-  llvm::BumpPtrAllocator Arena;
-  Symbol::Details Scratch;
-  Results->forEachResult([&](llvm::StringRef Key, llvm::StringRef Value) {
-    Arena.Reset();
-    llvm::yaml::Input Yin(Value, &Arena);
-    auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena);
-    clang::clangd::SymbolID ID;
-    Key >> ID;
-    if (const auto *Existing = UniqueSymbols.find(ID))
-      UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch));
-    else
-      UniqueSymbols.insert(Sym);
-  });
-  return std::move(UniqueSymbols).build();
-}
+/// Merges symbols for each translation unit as soon as the file is processed.
+/// Optimal choice for standalone tools.
+class OnTheFlyConsumer : public SymbolsConsumer {
+public:
+  void consumeSymbols(SymbolSlab Symbols) override {
+    std::lock_guard<std::mutex> Lock(Mut);
+    for (auto &&Sym : Symbols) {
+      Symbol::Details Scratch;
+      if (const auto *Existing = Result.find(Sym.ID))
+        Result.insert(mergeSymbol(*Existing, Sym, &Scratch));
+      else
+        Result.insert(Sym);
+    }
+  }
+
+  SymbolSlab mergeResults() override {
+    std::lock_guard<std::mutex> Lock(Mut);
+    return std::move(Result).build();
+  }
+
+private:
+  std::mutex Mut;
+  SymbolSlab::Builder Result;
+};
 
 } // namespace
 } // namespace clangd
@@ -181,18 +244,28 @@ int main(int argc, const char **argv) {
     return 1;
   }
 
+  if (clang::clangd::MergeOnTheFly && !Executor->get()->isSingleProcess()) {
+    llvm::errs()
+        << "Found multi-process executor, forcing the use of intermediate YAML "
+           "serialization instead of the on-the-fly merge.\n";
+    clang::clangd::MergeOnTheFly = false;
+  }
+
+  std::unique_ptr<clang::clangd::SymbolsConsumer> Consumer;
+  if (clang::clangd::MergeOnTheFly)
+    Consumer = llvm::make_unique<clang::clangd::OnTheFlyConsumer>();
+  else
+    Consumer =
+        llvm::make_unique<clang::clangd::ToolExecutorConsumer>(**Executor);
+
   // Map phase: emit symbols found in each translation unit.
   auto Err = Executor->get()->execute(
-      llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(
-          Executor->get()->getExecutionContext()));
+      llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(*Consumer));
   if (Err) {
     llvm::errs() << llvm::toString(std::move(Err)) << "\n";
   }
-
-  // Reduce phase: combine symbols using the ID as a key.
-  auto UniqueSymbols =
-      clang::clangd::mergeSymbols(Executor->get()->getToolResults());
-
+  // Reduce phase: combine symbols with the same IDs.
+  auto UniqueSymbols = Consumer->mergeResults();
   // Output phase: emit YAML for result symbols.
   SymbolsToYAML(UniqueSymbols, llvm::outs());
   return 0;




More information about the cfe-commits mailing list