[clang] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use thread pool in P1689 per file mode (PR #84285)

Chuanqi Xu via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 7 00:03:57 PST 2024


https://github.com/ChuanqiXu9 created https://github.com/llvm/llvm-project/pull/84285


I suddenly found that the clang scan deps may use all concurrent threads to scan the files. It makes sense in the batch mode. But in P1689 per file mode, it simply wastes times and resources.

This patch itself should be a NFC patch. It simply moves codes.

>From 8f3060da8059dc300152a1a1bc297cffcba51a61 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Thu, 7 Mar 2024 15:19:28 +0800
Subject: [PATCH] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use thread
 pool in P1689 per file mode

I suddenly found that the clang scan deps may use all concurrent threads
to scan the files. It makes sense in the batch mode. But in P1689
per file mode, it simply wastes times.
---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 204 +++++++++---------
 1 file changed, 108 insertions(+), 96 deletions(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index d042fecc3dbe63..843816a8ed6515 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -744,6 +744,9 @@ getCompilationDataBase(int argc, char **argv, std::string &ErrorMessage) {
     return nullptr;
   }
 
+  // Only 1 threads is required if P1689 per file mode.
+  NumThreads = 1;
+
   // There might be multiple jobs for a compilation. Extract the specified
   // output filename from the last job.
   auto LastCmd = C->getJobs().end();
@@ -867,13 +870,6 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
 
-  DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
-                                    EagerLoadModules);
-  llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
-  std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
-  for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
-    WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
-
   std::vector<tooling::CompileCommand> Inputs =
       AdjustingCompilations->getAllCompileCommands();
 
@@ -893,102 +889,118 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   if (Format == ScanningOutputFormat::Full)
     FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
 
-  if (Verbose) {
-    llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
-                 << " files using " << Pool.getMaxConcurrency() << " workers\n";
-  }
-
-  llvm::Timer T;
-  T.startTimer();
-
-  for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
-    Pool.async([&, I]() {
-      llvm::DenseSet<ModuleID> AlreadySeenModules;
-      while (auto MaybeInputIndex = GetNextInputIndex()) {
-        size_t LocalIndex = *MaybeInputIndex;
-        const tooling::CompileCommand *Input = &Inputs[LocalIndex];
-        std::string Filename = std::move(Input->Filename);
-        std::string CWD = std::move(Input->Directory);
-
-        std::optional<StringRef> MaybeModuleName;
-        if (!ModuleName.empty())
-          MaybeModuleName = ModuleName;
-
-        std::string OutputDir(ModuleFilesDir);
-        if (OutputDir.empty())
-          OutputDir = getModuleCachePath(Input->CommandLine);
-        auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
-          return ::lookupModuleOutput(MID, MOK, OutputDir);
-        };
-
-        // Run the tool on it.
-        if (Format == ScanningOutputFormat::Make) {
-          auto MaybeFile =
-              WorkerTools[I]->getDependencyFile(Input->CommandLine, CWD);
-          if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
-                                             Errs))
-            HadErrors = true;
-        } else if (Format == ScanningOutputFormat::P1689) {
-          // It is useful to generate the make-format dependency output during
-          // the scanning for P1689. Otherwise the users need to scan again for
-          // it. We will generate the make-format dependency output if we find
-          // `-MF` in the command lines.
-          std::string MakeformatOutputPath;
-          std::string MakeformatOutput;
-
-          auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile(
-              *Input, CWD, MakeformatOutput, MakeformatOutputPath);
-
-          if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
-            HadErrors = true;
+  std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
 
-          if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() &&
-              !HadErrors) {
-            static std::mutex Lock;
-            // With compilation database, we may open different files
-            // concurrently or we may write the same file concurrently. So we
-            // use a map here to allow multiple compile commands to write to the
-            // same file. Also we need a lock here to avoid data race.
-            static llvm::StringMap<llvm::raw_fd_ostream> OSs;
-            std::unique_lock<std::mutex> LockGuard(Lock);
-
-            auto OSIter = OSs.find(MakeformatOutputPath);
-            if (OSIter == OSs.end()) {
-              std::error_code EC;
-              OSIter = OSs.try_emplace(MakeformatOutputPath,
-                                       MakeformatOutputPath, EC)
-                           .first;
-              if (EC)
-                llvm::errs()
-                    << "Failed to open P1689 make format output file \""
-                    << MakeformatOutputPath << "\" for " << EC.message()
-                    << "\n";
-            }
+  auto ScanningTask = [&](unsigned I) {
+    llvm::DenseSet<ModuleID> AlreadySeenModules;
+    while (auto MaybeInputIndex = GetNextInputIndex()) {
+      size_t LocalIndex = *MaybeInputIndex;
+      const tooling::CompileCommand *Input = &Inputs[LocalIndex];
+      std::string Filename = std::move(Input->Filename);
+      std::string CWD = std::move(Input->Directory);
+
+      std::optional<StringRef> MaybeModuleName;
+      if (!ModuleName.empty())
+        MaybeModuleName = ModuleName;
+
+      std::string OutputDir(ModuleFilesDir);
+      if (OutputDir.empty())
+        OutputDir = getModuleCachePath(Input->CommandLine);
+      auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
+        return ::lookupModuleOutput(MID, MOK, OutputDir);
+      };
 
-            SharedStream MakeformatOS(OSIter->second);
-            llvm::Expected<std::string> MaybeOutput(MakeformatOutput);
-            if (handleMakeDependencyToolResult(Filename, MaybeOutput,
-                                               MakeformatOS, Errs))
-              HadErrors = true;
+      // Run the tool on it.
+      if (Format == ScanningOutputFormat::Make) {
+        auto MaybeFile =
+            WorkerTools[I]->getDependencyFile(Input->CommandLine, CWD);
+        if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
+                                           Errs))
+          HadErrors = true;
+      } else if (Format == ScanningOutputFormat::P1689) {
+        // It is useful to generate the make-format dependency output during
+        // the scanning for P1689. Otherwise the users need to scan again for
+        // it. We will generate the make-format dependency output if we find
+        // `-MF` in the command lines.
+        std::string MakeformatOutputPath;
+        std::string MakeformatOutput;
+
+        auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile(
+            *Input, CWD, MakeformatOutput, MakeformatOutputPath);
+
+        if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
+          HadErrors = true;
+
+        if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() &&
+            !HadErrors) {
+          static std::mutex Lock;
+          // With compilation database, we may open different files
+          // concurrently or we may write the same file concurrently. So we
+          // use a map here to allow multiple compile commands to write to the
+          // same file. Also we need a lock here to avoid data race.
+          static llvm::StringMap<llvm::raw_fd_ostream> OSs;
+          std::unique_lock<std::mutex> LockGuard(Lock);
+
+          auto OSIter = OSs.find(MakeformatOutputPath);
+          if (OSIter == OSs.end()) {
+            std::error_code EC;
+            OSIter =
+                OSs.try_emplace(MakeformatOutputPath, MakeformatOutputPath, EC)
+                    .first;
+            if (EC)
+              llvm::errs() << "Failed to open P1689 make format output file \""
+                           << MakeformatOutputPath << "\" for " << EC.message()
+                           << "\n";
           }
-        } else if (MaybeModuleName) {
-          auto MaybeModuleDepsGraph = WorkerTools[I]->getModuleDependencies(
-              *MaybeModuleName, Input->CommandLine, CWD, AlreadySeenModules,
-              LookupOutput);
-          if (handleModuleResult(*MaybeModuleName, MaybeModuleDepsGraph, *FD,
-                                 LocalIndex, DependencyOS, Errs))
-            HadErrors = true;
-        } else {
-          auto MaybeTUDeps = WorkerTools[I]->getTranslationUnitDependencies(
-              Input->CommandLine, CWD, AlreadySeenModules, LookupOutput);
-          if (handleTranslationUnitResult(Filename, MaybeTUDeps, *FD,
-                                          LocalIndex, DependencyOS, Errs))
+
+          SharedStream MakeformatOS(OSIter->second);
+          llvm::Expected<std::string> MaybeOutput(MakeformatOutput);
+          if (handleMakeDependencyToolResult(Filename, MaybeOutput,
+                                             MakeformatOS, Errs))
             HadErrors = true;
         }
+      } else if (MaybeModuleName) {
+        auto MaybeModuleDepsGraph = WorkerTools[I]->getModuleDependencies(
+            *MaybeModuleName, Input->CommandLine, CWD, AlreadySeenModules,
+            LookupOutput);
+        if (handleModuleResult(*MaybeModuleName, MaybeModuleDepsGraph, *FD,
+                               LocalIndex, DependencyOS, Errs))
+          HadErrors = true;
+      } else {
+        auto MaybeTUDeps = WorkerTools[I]->getTranslationUnitDependencies(
+            Input->CommandLine, CWD, AlreadySeenModules, LookupOutput);
+        if (handleTranslationUnitResult(Filename, MaybeTUDeps, *FD, LocalIndex,
+                                        DependencyOS, Errs))
+          HadErrors = true;
       }
-    });
+    }
+  };
+
+  DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
+                                    EagerLoadModules);
+
+  llvm::Timer T;
+  T.startTimer();
+
+  if (NumThreads == 1) {
+    WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
+    ScanningTask(0);
+  } else {
+    llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
+    for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
+      WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
+
+    if (Verbose) {
+      llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
+                   << " files using " << Pool.getMaxConcurrency()
+                   << " workers\n";
+    }
+
+    for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
+      Pool.async([I, ScanningTask]() { ScanningTask(I); });
+    }
+    Pool.wait();
   }
-  Pool.wait();
 
   T.stopTimer();
   if (PrintTiming)



More information about the cfe-commits mailing list