[clang] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use thread pool in P1689 per file mode (PR #84285)

Chuanqi Xu via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 7 18:47:56 PST 2024


https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/84285

>From dd9711be2368a299b408d0ff06ec9c1c0540083b Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Thu, 7 Mar 2024 15:19:28 +0800
Subject: [PATCH 1/2] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use
 thread pool in P1689 per file mode

I suddenly found that the clang scan deps may use all concurrent threads
to scan the files. It makes sense in the batch mode. But in P1689
per file mode, it simply wastes times.
---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 204 +++++++++---------
 1 file changed, 108 insertions(+), 96 deletions(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index d042fecc3dbe63..843816a8ed6515 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -744,6 +744,9 @@ getCompilationDataBase(int argc, char **argv, std::string &ErrorMessage) {
     return nullptr;
   }
 
+  // Only 1 threads is required if P1689 per file mode.
+  NumThreads = 1;
+
   // There might be multiple jobs for a compilation. Extract the specified
   // output filename from the last job.
   auto LastCmd = C->getJobs().end();
@@ -867,13 +870,6 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
 
-  DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
-                                    EagerLoadModules);
-  llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
-  std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
-  for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
-    WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
-
   std::vector<tooling::CompileCommand> Inputs =
       AdjustingCompilations->getAllCompileCommands();
 
@@ -893,102 +889,118 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   if (Format == ScanningOutputFormat::Full)
     FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
 
-  if (Verbose) {
-    llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
-                 << " files using " << Pool.getMaxConcurrency() << " workers\n";
-  }
-
-  llvm::Timer T;
-  T.startTimer();
-
-  for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
-    Pool.async([&, I]() {
-      llvm::DenseSet<ModuleID> AlreadySeenModules;
-      while (auto MaybeInputIndex = GetNextInputIndex()) {
-        size_t LocalIndex = *MaybeInputIndex;
-        const tooling::CompileCommand *Input = &Inputs[LocalIndex];
-        std::string Filename = std::move(Input->Filename);
-        std::string CWD = std::move(Input->Directory);
-
-        std::optional<StringRef> MaybeModuleName;
-        if (!ModuleName.empty())
-          MaybeModuleName = ModuleName;
-
-        std::string OutputDir(ModuleFilesDir);
-        if (OutputDir.empty())
-          OutputDir = getModuleCachePath(Input->CommandLine);
-        auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
-          return ::lookupModuleOutput(MID, MOK, OutputDir);
-        };
-
-        // Run the tool on it.
-        if (Format == ScanningOutputFormat::Make) {
-          auto MaybeFile =
-              WorkerTools[I]->getDependencyFile(Input->CommandLine, CWD);
-          if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
-                                             Errs))
-            HadErrors = true;
-        } else if (Format == ScanningOutputFormat::P1689) {
-          // It is useful to generate the make-format dependency output during
-          // the scanning for P1689. Otherwise the users need to scan again for
-          // it. We will generate the make-format dependency output if we find
-          // `-MF` in the command lines.
-          std::string MakeformatOutputPath;
-          std::string MakeformatOutput;
-
-          auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile(
-              *Input, CWD, MakeformatOutput, MakeformatOutputPath);
-
-          if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
-            HadErrors = true;
+  std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
 
-          if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() &&
-              !HadErrors) {
-            static std::mutex Lock;
-            // With compilation database, we may open different files
-            // concurrently or we may write the same file concurrently. So we
-            // use a map here to allow multiple compile commands to write to the
-            // same file. Also we need a lock here to avoid data race.
-            static llvm::StringMap<llvm::raw_fd_ostream> OSs;
-            std::unique_lock<std::mutex> LockGuard(Lock);
-
-            auto OSIter = OSs.find(MakeformatOutputPath);
-            if (OSIter == OSs.end()) {
-              std::error_code EC;
-              OSIter = OSs.try_emplace(MakeformatOutputPath,
-                                       MakeformatOutputPath, EC)
-                           .first;
-              if (EC)
-                llvm::errs()
-                    << "Failed to open P1689 make format output file \""
-                    << MakeformatOutputPath << "\" for " << EC.message()
-                    << "\n";
-            }
+  auto ScanningTask = [&](unsigned I) {
+    llvm::DenseSet<ModuleID> AlreadySeenModules;
+    while (auto MaybeInputIndex = GetNextInputIndex()) {
+      size_t LocalIndex = *MaybeInputIndex;
+      const tooling::CompileCommand *Input = &Inputs[LocalIndex];
+      std::string Filename = std::move(Input->Filename);
+      std::string CWD = std::move(Input->Directory);
+
+      std::optional<StringRef> MaybeModuleName;
+      if (!ModuleName.empty())
+        MaybeModuleName = ModuleName;
+
+      std::string OutputDir(ModuleFilesDir);
+      if (OutputDir.empty())
+        OutputDir = getModuleCachePath(Input->CommandLine);
+      auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
+        return ::lookupModuleOutput(MID, MOK, OutputDir);
+      };
 
-            SharedStream MakeformatOS(OSIter->second);
-            llvm::Expected<std::string> MaybeOutput(MakeformatOutput);
-            if (handleMakeDependencyToolResult(Filename, MaybeOutput,
-                                               MakeformatOS, Errs))
-              HadErrors = true;
+      // Run the tool on it.
+      if (Format == ScanningOutputFormat::Make) {
+        auto MaybeFile =
+            WorkerTools[I]->getDependencyFile(Input->CommandLine, CWD);
+        if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
+                                           Errs))
+          HadErrors = true;
+      } else if (Format == ScanningOutputFormat::P1689) {
+        // It is useful to generate the make-format dependency output during
+        // the scanning for P1689. Otherwise the users need to scan again for
+        // it. We will generate the make-format dependency output if we find
+        // `-MF` in the command lines.
+        std::string MakeformatOutputPath;
+        std::string MakeformatOutput;
+
+        auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile(
+            *Input, CWD, MakeformatOutput, MakeformatOutputPath);
+
+        if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
+          HadErrors = true;
+
+        if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() &&
+            !HadErrors) {
+          static std::mutex Lock;
+          // With compilation database, we may open different files
+          // concurrently or we may write the same file concurrently. So we
+          // use a map here to allow multiple compile commands to write to the
+          // same file. Also we need a lock here to avoid data race.
+          static llvm::StringMap<llvm::raw_fd_ostream> OSs;
+          std::unique_lock<std::mutex> LockGuard(Lock);
+
+          auto OSIter = OSs.find(MakeformatOutputPath);
+          if (OSIter == OSs.end()) {
+            std::error_code EC;
+            OSIter =
+                OSs.try_emplace(MakeformatOutputPath, MakeformatOutputPath, EC)
+                    .first;
+            if (EC)
+              llvm::errs() << "Failed to open P1689 make format output file \""
+                           << MakeformatOutputPath << "\" for " << EC.message()
+                           << "\n";
           }
-        } else if (MaybeModuleName) {
-          auto MaybeModuleDepsGraph = WorkerTools[I]->getModuleDependencies(
-              *MaybeModuleName, Input->CommandLine, CWD, AlreadySeenModules,
-              LookupOutput);
-          if (handleModuleResult(*MaybeModuleName, MaybeModuleDepsGraph, *FD,
-                                 LocalIndex, DependencyOS, Errs))
-            HadErrors = true;
-        } else {
-          auto MaybeTUDeps = WorkerTools[I]->getTranslationUnitDependencies(
-              Input->CommandLine, CWD, AlreadySeenModules, LookupOutput);
-          if (handleTranslationUnitResult(Filename, MaybeTUDeps, *FD,
-                                          LocalIndex, DependencyOS, Errs))
+
+          SharedStream MakeformatOS(OSIter->second);
+          llvm::Expected<std::string> MaybeOutput(MakeformatOutput);
+          if (handleMakeDependencyToolResult(Filename, MaybeOutput,
+                                             MakeformatOS, Errs))
             HadErrors = true;
         }
+      } else if (MaybeModuleName) {
+        auto MaybeModuleDepsGraph = WorkerTools[I]->getModuleDependencies(
+            *MaybeModuleName, Input->CommandLine, CWD, AlreadySeenModules,
+            LookupOutput);
+        if (handleModuleResult(*MaybeModuleName, MaybeModuleDepsGraph, *FD,
+                               LocalIndex, DependencyOS, Errs))
+          HadErrors = true;
+      } else {
+        auto MaybeTUDeps = WorkerTools[I]->getTranslationUnitDependencies(
+            Input->CommandLine, CWD, AlreadySeenModules, LookupOutput);
+        if (handleTranslationUnitResult(Filename, MaybeTUDeps, *FD, LocalIndex,
+                                        DependencyOS, Errs))
+          HadErrors = true;
       }
-    });
+    }
+  };
+
+  DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
+                                    EagerLoadModules);
+
+  llvm::Timer T;
+  T.startTimer();
+
+  if (NumThreads == 1) {
+    WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
+    ScanningTask(0);
+  } else {
+    llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
+    for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
+      WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
+
+    if (Verbose) {
+      llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
+                   << " files using " << Pool.getMaxConcurrency()
+                   << " workers\n";
+    }
+
+    for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
+      Pool.async([I, ScanningTask]() { ScanningTask(I); });
+    }
+    Pool.wait();
   }
-  Pool.wait();
 
   T.stopTimer();
   if (PrintTiming)

>From e875b07ad16c7c66d8d9dd70efca30276981f639 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Fri, 8 Mar 2024 10:47:30 +0800
Subject: [PATCH 2/2] Address comments

---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 843816a8ed6515..2fd85c8e651c75 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -889,9 +889,7 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   if (Format == ScanningOutputFormat::Full)
     FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
 
-  std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
-
-  auto ScanningTask = [&](unsigned I) {
+  auto ScanningTask = [&](DependencyScanningTool &WorkerTool) {
     llvm::DenseSet<ModuleID> AlreadySeenModules;
     while (auto MaybeInputIndex = GetNextInputIndex()) {
       size_t LocalIndex = *MaybeInputIndex;
@@ -913,7 +911,7 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
       // Run the tool on it.
       if (Format == ScanningOutputFormat::Make) {
         auto MaybeFile =
-            WorkerTools[I]->getDependencyFile(Input->CommandLine, CWD);
+            WorkerTool.getDependencyFile(Input->CommandLine, CWD);
         if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
                                            Errs))
           HadErrors = true;
@@ -925,7 +923,7 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
         std::string MakeformatOutputPath;
         std::string MakeformatOutput;
 
-        auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile(
+        auto MaybeRule = WorkerTool.getP1689ModuleDependencyFile(
             *Input, CWD, MakeformatOutput, MakeformatOutputPath);
 
         if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
@@ -960,14 +958,14 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
             HadErrors = true;
         }
       } else if (MaybeModuleName) {
-        auto MaybeModuleDepsGraph = WorkerTools[I]->getModuleDependencies(
+        auto MaybeModuleDepsGraph = WorkerTool.getModuleDependencies(
             *MaybeModuleName, Input->CommandLine, CWD, AlreadySeenModules,
             LookupOutput);
         if (handleModuleResult(*MaybeModuleName, MaybeModuleDepsGraph, *FD,
                                LocalIndex, DependencyOS, Errs))
           HadErrors = true;
       } else {
-        auto MaybeTUDeps = WorkerTools[I]->getTranslationUnitDependencies(
+        auto MaybeTUDeps = WorkerTool.getTranslationUnitDependencies(
             Input->CommandLine, CWD, AlreadySeenModules, LookupOutput);
         if (handleTranslationUnitResult(Filename, MaybeTUDeps, *FD, LocalIndex,
                                         DependencyOS, Errs))
@@ -982,13 +980,14 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   llvm::Timer T;
   T.startTimer();
 
-  if (NumThreads == 1) {
-    WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
-    ScanningTask(0);
+  if (Inputs.size() == 1) {
+    DependencyScanningTool WorkerTool(Service);
+    ScanningTask(WorkerTool);
   } else {
     llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
+    std::vector<DependencyScanningTool> WorkerTools;
     for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
-      WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
+      WorkerTools.emplace_back(Service);
 
     if (Verbose) {
       llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
@@ -997,7 +996,9 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
     }
 
     for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
-      Pool.async([I, ScanningTask]() { ScanningTask(I); });
+      Pool.async([ScanningTask, &WorkerTools, I]() {
+        ScanningTask(WorkerTools[I]);
+      });
     }
     Pool.wait();
   }



More information about the cfe-commits mailing list