[clang] [clang][deps] Generate command lines lazily (PR #65691)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 7 15:59:34 PDT 2023


https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/65691:

This patch makes the generation of command lines for modular dependencies lazy/on-demand. That operation is somewhat expensive and prior to this patch used to be performed multiple times for the identical `ModuleDeps` (i.e. when they were imported from multiple different TUs).

>From 223e26abc31f4fccfb65e98911c853a1c05080c4 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Fri, 1 Sep 2023 13:01:24 -0700
Subject: [PATCH] [clang][deps] Generate command-lines lazily

---
 .../DependencyScanning/ModuleDepCollector.h   | 14 +++++++---
 .../DependencyScanning/ModuleDepCollector.cpp | 10 ++++++-
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 27 ++++++++++++-------
 3 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
index ef75c580552181c..4e9540f692f1087 100644
--- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
+++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
@@ -20,9 +20,11 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/raw_ostream.h"
+#include <memory>
 #include <optional>
 #include <string>
 #include <unordered_map>
+#include <variant>
 
 namespace clang {
 namespace tooling {
@@ -136,9 +138,15 @@ struct ModuleDeps {
   /// determined that the differences are benign for this compilation.
   std::vector<ModuleID> ClangModuleDeps;
 
-  /// Compiler invocation that can be used to build this module. Does not
-  /// include argv[0].
-  std::vector<std::string> BuildArguments;
+  /// Get (or compute) the compiler invocation that can be used to build this
+  /// module. Does not include argv[0].
+  const std::vector<std::string> &getBuildArguments();
+
+private:
+  friend class ModuleDepCollectorPP;
+
+  std::variant<std::shared_ptr<CowCompilerInvocation>, std::vector<std::string>>
+      BuildInvocationOrArguments;
 };
 
 class ModuleDepCollector;
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index a7248860ad4b567..dd443d85c8633fc 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -21,6 +21,13 @@ using namespace clang;
 using namespace tooling;
 using namespace dependencies;
 
+const std::vector<std::string> &ModuleDeps::getBuildArguments() {
+  if (auto *CI = std::get_if<std::shared_ptr<CowCompilerInvocation>>(
+          &BuildInvocationOrArguments))
+    BuildInvocationOrArguments = (*CI)->getCC1CommandLine();
+  return std::get<std::vector<std::string>>(BuildInvocationOrArguments);
+}
+
 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts,
                                      ASTReader &Reader,
                                      const serialization::ModuleFile &MF) {
@@ -532,7 +539,8 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
   // Finish the compiler invocation. Requires dependencies and the context hash.
   MDC.addOutputPaths(CI, MD);
 
-  MD.BuildArguments = CI.getCC1CommandLine();
+  MD.BuildInvocationOrArguments =
+      std::make_unique<CowCompilerInvocation>(std::move(CI));
 
   return MD.ID;
 }
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index dab3de42b7fa1af..0213bb9c9616d67 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -351,14 +351,23 @@ class FullDeps {
   }
 
   void mergeDeps(ModuleDepsGraph Graph, size_t InputIndex) {
-    std::unique_lock<std::mutex> ul(Lock);
-    for (const ModuleDeps &MD : Graph) {
-      auto I = Modules.find({MD.ID, 0});
-      if (I != Modules.end()) {
-        I->first.InputIndex = std::min(I->first.InputIndex, InputIndex);
-        continue;
+    std::vector<ModuleDeps *> NewMDs;
+    {
+      std::unique_lock<std::mutex> ul(Lock);
+      for (const ModuleDeps &MD : Graph) {
+        auto I = Modules.find({MD.ID, 0});
+        if (I != Modules.end()) {
+          I->first.InputIndex = std::min(I->first.InputIndex, InputIndex);
+          continue;
+        }
+        auto Res = Modules.insert(I, {{MD.ID, InputIndex}, std::move(MD)});
+        NewMDs.push_back(&Res->second);
       }
-      Modules.insert(I, {{MD.ID, InputIndex}, std::move(MD)});
+      // First call to \c getBuildArguments is somewhat expensive. Let's call it
+      // on the current thread (instead of the main one), and outside the
+      // critical section.
+      for (ModuleDeps *MD : NewMDs)
+        (void)MD->getBuildArguments();
     }
   }
 
@@ -382,7 +391,7 @@ class FullDeps {
                                             /*ShouldOwnClient=*/false);
 
     for (auto &&M : Modules)
-      if (roundTripCommand(M.second.BuildArguments, *Diags))
+      if (roundTripCommand(M.second.getBuildArguments(), *Diags))
         return true;
 
     for (auto &&I : Inputs)
@@ -411,7 +420,7 @@ class FullDeps {
           {"file-deps", toJSONSorted(MD.FileDeps)},
           {"clang-module-deps", toJSONSorted(MD.ClangModuleDeps)},
           {"clang-modulemap-file", MD.ClangModuleMapFile},
-          {"command-line", MD.BuildArguments},
+          {"command-line", MD.getBuildArguments()},
       };
       OutModules.push_back(std::move(O));
     }



More information about the cfe-commits mailing list