[clang] [clang][deps] Store common, partially-formed invocation (PR #65677)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 7 14:20:57 PDT 2023


https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/65677:

We create one `CompilerInvocation` for each modular dependency we discover. This means we create a lot of copies, even though most of the invocation is the same between modules. This patch makes use of the copy-on-write flavor of `CompilerInvocation` to share the common parts, reducing memory usage and speeding up the scan.

>From 7c54eae50d15aabedaff256fcc2465d2399f69b8 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 24 Aug 2023 17:33:38 -0700
Subject: [PATCH] [clang][deps] Store common, partially-formed invocation

---
 .../DependencyScanning/ModuleDepCollector.h   |  23 ++--
 .../DependencyScanning/ModuleDepCollector.cpp | 117 +++++++++++-------
 2 files changed, 84 insertions(+), 56 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
index 914d55eadefe851..ef75c580552181c 100644
--- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
+++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
@@ -241,8 +241,11 @@ class ModuleDepCollector final : public DependencyCollector {
   llvm::SetVector<const Module *> DirectModularDeps;
   /// Options that control the dependency output generation.
   std::unique_ptr<DependencyOutputOptions> Opts;
-  /// The original Clang invocation passed to dependency scanner.
-  CompilerInvocation OriginalInvocation;
+  /// A Clang invocation that's based on the original TU invocation and that has
+  /// been partially transformed into one that can perform explicit build of
+  /// a discovered modular dependency. Note that this still needs to be adjusted
+  /// for each individual module.
+  CowCompilerInvocation CommonInvocation;
   /// Whether to optimize the modules' command-line arguments.
   bool OptimizeArgs;
   /// Whether to set up command-lines to load PCM files eagerly.
@@ -262,12 +265,11 @@ class ModuleDepCollector final : public DependencyCollector {
   /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
   void addFileDep(ModuleDeps &MD, StringRef Path);
 
-  /// Constructs a CompilerInvocation that can be used to build the given
-  /// module, excluding paths to discovered modular dependencies that are yet to
-  /// be built.
-  CompilerInvocation makeInvocationForModuleBuildWithoutOutputs(
+  /// Get a Clang invocation adjusted to build the given modular dependency.
+  /// This excludes paths that are yet-to-be-provided by the build system.
+  CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs(
       const ModuleDeps &Deps,
-      llvm::function_ref<void(CompilerInvocation &)> Optimize) const;
+      llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const;
 
   /// Collect module map files for given modules.
   llvm::DenseSet<const FileEntry *>
@@ -279,13 +281,16 @@ class ModuleDepCollector final : public DependencyCollector {
   /// Add module files (pcm) to the invocation, if needed.
   void addModuleFiles(CompilerInvocation &CI,
                       ArrayRef<ModuleID> ClangModuleDeps) const;
+  void addModuleFiles(CowCompilerInvocation &CI,
+                      ArrayRef<ModuleID> ClangModuleDeps) const;
 
   /// Add paths that require looking up outputs to the given dependencies.
-  void addOutputPaths(CompilerInvocation &CI, ModuleDeps &Deps);
+  void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps);
 
   /// Compute the context hash for \p Deps, and create the mapping
   /// \c ModuleDepsByID[Deps.ID] = &Deps.
-  void associateWithContextHash(const CompilerInvocation &CI, ModuleDeps &Deps);
+  void associateWithContextHash(const CowCompilerInvocation &CI,
+                                ModuleDeps &Deps);
 };
 
 } // end namespace dependencies
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index e13f7c74e9b92e2..59cd6e96466af89 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -54,18 +54,18 @@ static std::vector<std::string> splitString(std::string S, char Separator) {
   return Result;
 }
 
-void ModuleDepCollector::addOutputPaths(CompilerInvocation &CI,
+void ModuleDepCollector::addOutputPaths(CowCompilerInvocation &CI,
                                         ModuleDeps &Deps) {
-  CI.getFrontendOpts().OutputFile =
+  CI.getMutFrontendOpts().OutputFile =
       Controller.lookupModuleOutput(Deps.ID, ModuleOutputKind::ModuleFile);
   if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty())
-    CI.getDiagnosticOpts().DiagnosticSerializationFile =
+    CI.getMutDiagnosticOpts().DiagnosticSerializationFile =
         Controller.lookupModuleOutput(
             Deps.ID, ModuleOutputKind::DiagnosticSerializationFile);
   if (!CI.getDependencyOutputOpts().OutputFile.empty()) {
-    CI.getDependencyOutputOpts().OutputFile = Controller.lookupModuleOutput(
+    CI.getMutDependencyOutputOpts().OutputFile = Controller.lookupModuleOutput(
         Deps.ID, ModuleOutputKind::DependencyFile);
-    CI.getDependencyOutputOpts().Targets =
+    CI.getMutDependencyOutputOpts().Targets =
         splitString(Controller.lookupModuleOutput(
                         Deps.ID, ModuleOutputKind::DependencyTargets),
                     '\0');
@@ -74,18 +74,13 @@ void ModuleDepCollector::addOutputPaths(CompilerInvocation &CI,
       // Fallback to -o as dependency target, as in the driver.
       SmallString<128> Target;
       quoteMakeTarget(CI.getFrontendOpts().OutputFile, Target);
-      CI.getDependencyOutputOpts().Targets.push_back(std::string(Target));
+      CI.getMutDependencyOutputOpts().Targets.push_back(std::string(Target));
     }
   }
 }
 
-CompilerInvocation
-ModuleDepCollector::makeInvocationForModuleBuildWithoutOutputs(
-    const ModuleDeps &Deps,
-    llvm::function_ref<void(CompilerInvocation &)> Optimize) const {
-  // Make a deep copy of the original Clang invocation.
-  CompilerInvocation CI(OriginalInvocation);
-
+static CowCompilerInvocation
+makeCommonInvocationForModuleBuild(CompilerInvocation CI) {
   CI.resetNonModularOptions();
   CI.clearImplicitModuleBuildOptions();
 
@@ -117,14 +112,37 @@ ModuleDepCollector::makeInvocationForModuleBuildWithoutOutputs(
   CI.getFrontendOpts().ARCMTAction = FrontendOptions::ARCMT_None;
   CI.getFrontendOpts().ObjCMTAction = FrontendOptions::ObjCMT_None;
   CI.getFrontendOpts().MTMigrateDir.clear();
-  CI.getLangOpts().ModuleName = Deps.ID.ModuleName;
-  CI.getFrontendOpts().IsSystemModule = Deps.IsSystem;
+
+  // Remove any macro definitions that are explicitly ignored.
+  if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) {
+    llvm::erase_if(
+        CI.getPreprocessorOpts().Macros,
+        [&CI](const std::pair<std::string, bool> &Def) {
+          StringRef MacroDef = Def.first;
+          return CI.getHeaderSearchOpts().ModulesIgnoreMacros.contains(
+              llvm::CachedHashString(MacroDef.split('=').first));
+        });
+    // Remove the now unused option.
+    CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear();
+  }
+
+  return CI;
+}
+
+CowCompilerInvocation
+ModuleDepCollector::getInvocationAdjustedForModuleBuildWithoutOutputs(
+    const ModuleDeps &Deps,
+    llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const {
+  CowCompilerInvocation CI = CommonInvocation;
+
+  CI.getMutLangOpts().ModuleName = Deps.ID.ModuleName;
+  CI.getMutFrontendOpts().IsSystemModule = Deps.IsSystem;
 
   // Inputs
   InputKind ModuleMapInputKind(CI.getFrontendOpts().DashX.getLanguage(),
                                InputKind::Format::ModuleMap);
-  CI.getFrontendOpts().Inputs.emplace_back(Deps.ClangModuleMapFile,
-                                           ModuleMapInputKind);
+  CI.getMutFrontendOpts().Inputs.emplace_back(Deps.ClangModuleMapFile,
+                                              ModuleMapInputKind);
 
   auto CurrentModuleMapEntry =
       ScanInstance.getFileManager().getFile(Deps.ClangModuleMapFile);
@@ -150,36 +168,25 @@ ModuleDepCollector::makeInvocationForModuleBuildWithoutOutputs(
         !DepModuleMapFiles.contains(*ModuleMapEntry))
       continue;
 
-    CI.getFrontendOpts().ModuleMapFiles.emplace_back(ModuleMapFile);
+    CI.getMutFrontendOpts().ModuleMapFiles.emplace_back(ModuleMapFile);
   }
 
   // Report the prebuilt modules this module uses.
   for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
-    CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
+    CI.getMutFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
 
   // Add module file inputs from dependencies.
   addModuleFiles(CI, Deps.ClangModuleDeps);
 
-  // Remove any macro definitions that are explicitly ignored.
-  if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) {
-    llvm::erase_if(
-        CI.getPreprocessorOpts().Macros,
-        [&CI](const std::pair<std::string, bool> &Def) {
-          StringRef MacroDef = Def.first;
-          return CI.getHeaderSearchOpts().ModulesIgnoreMacros.contains(
-              llvm::CachedHashString(MacroDef.split('=').first));
-        });
-    // Remove the now unused option.
-    CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear();
+  if (!CI.getDiagnosticOpts().SystemHeaderWarningsModules.empty()) {
+    // Apply -Wsystem-headers-in-module for the current module.
+    if (llvm::is_contained(CI.getDiagnosticOpts().SystemHeaderWarningsModules,
+                           Deps.ID.ModuleName))
+      CI.getMutDiagnosticOpts().Warnings.push_back("system-headers");
+    // Remove the now unused option(s).
+    CI.getMutDiagnosticOpts().SystemHeaderWarningsModules.clear();
   }
 
-  // Apply -Wsystem-headers-in-module for the current module.
-  if (llvm::is_contained(CI.getDiagnosticOpts().SystemHeaderWarningsModules,
-                         Deps.ID.ModuleName))
-    CI.getDiagnosticOpts().Warnings.push_back("system-headers");
-  // Remove the now unused option(s).
-  CI.getDiagnosticOpts().SystemHeaderWarningsModules.clear();
-
   Optimize(CI);
 
   return CI;
@@ -224,6 +231,19 @@ void ModuleDepCollector::addModuleFiles(
   }
 }
 
+void ModuleDepCollector::addModuleFiles(
+    CowCompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
+  for (const ModuleID &MID : ClangModuleDeps) {
+    std::string PCMPath =
+        Controller.lookupModuleOutput(MID, ModuleOutputKind::ModuleFile);
+    if (EagerLoadModules)
+      CI.getMutFrontendOpts().ModuleFiles.push_back(std::move(PCMPath));
+    else
+      CI.getMutHeaderSearchOpts().PrebuiltModuleFiles.insert(
+          {MID.ModuleName, std::move(PCMPath)});
+  }
+}
+
 static bool needsModules(FrontendInputFile FIF) {
   switch (FIF.getKind().getLanguage()) {
   case Language::Unknown:
@@ -264,7 +284,7 @@ void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
 }
 
 static std::string getModuleContextHash(const ModuleDeps &MD,
-                                        const CompilerInvocation &CI,
+                                        const CowCompilerInvocation &CI,
                                         bool EagerLoadModules) {
   llvm::HashBuilder<llvm::TruncatedBLAKE3<16>,
                     llvm::support::endianness::native>
@@ -304,8 +324,8 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
   return toString(llvm::APInt(sizeof(Words) * 8, Words), 36, /*Signed=*/false);
 }
 
-void ModuleDepCollector::associateWithContextHash(const CompilerInvocation &CI,
-                                                  ModuleDeps &Deps) {
+void ModuleDepCollector::associateWithContextHash(
+    const CowCompilerInvocation &CI, ModuleDeps &Deps) {
   Deps.ID.ContextHash = getModuleContextHash(Deps, CI, EagerLoadModules);
   bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
   (void)Inserted;
@@ -498,12 +518,13 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
         MD.ModuleMapFileDeps.emplace_back(IFI.FilenameAsRequested);
       });
 
-  CompilerInvocation CI = MDC.makeInvocationForModuleBuildWithoutOutputs(
-      MD, [&](CompilerInvocation &BuildInvocation) {
-        if (MDC.OptimizeArgs)
-          optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(),
-                                   *MDC.ScanInstance.getASTReader(), *MF);
-      });
+  CowCompilerInvocation CI =
+      MDC.getInvocationAdjustedForModuleBuildWithoutOutputs(
+          MD, [&](CowCompilerInvocation &BuildInvocation) {
+            if (MDC.OptimizeArgs)
+              optimizeHeaderSearchOpts(BuildInvocation.getMutHeaderSearchOpts(),
+                                       *MDC.ScanInstance.getASTReader(), *MF);
+          });
 
   MDC.associateWithContextHash(CI, MD);
 
@@ -601,7 +622,9 @@ ModuleDepCollector::ModuleDepCollector(
     DependencyActionController &Controller, CompilerInvocation OriginalCI,
     bool OptimizeArgs, bool EagerLoadModules, bool IsStdModuleP1689Format)
     : ScanInstance(ScanInstance), Consumer(C), Controller(Controller),
-      Opts(std::move(Opts)), OriginalInvocation(std::move(OriginalCI)),
+      Opts(std::move(Opts)),
+      CommonInvocation(
+          makeCommonInvocationForModuleBuild(std::move(OriginalCI))),
       OptimizeArgs(OptimizeArgs), EagerLoadModules(EagerLoadModules),
       IsStdModuleP1689Format(IsStdModuleP1689Format) {}
 



More information about the cfe-commits mailing list