[clang] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use thread pool in P1689 per file mode (PR #84285)

Chuanqi Xu via cfe-commits cfe-commits at lists.llvm.org
Mon Mar 11 19:13:20 PDT 2024


https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/84285

>From 48b3261e1d217b7ce78180314a222dca4d6aba18 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Thu, 7 Mar 2024 15:19:28 +0800
Subject: [PATCH 1/5] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use
 thread pool in P1689 per file mode

I suddenly found that the clang scan deps may use all concurrent threads
to scan the files. It makes sense in the batch mode. But in P1689
per file mode, it simply wastes times.
---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 204 +++++++++---------
 1 file changed, 108 insertions(+), 96 deletions(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index d042fecc3dbe63..843816a8ed6515 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -744,6 +744,9 @@ getCompilationDataBase(int argc, char **argv, std::string &ErrorMessage) {
     return nullptr;
   }
 
+  // Only 1 threads is required if P1689 per file mode.
+  NumThreads = 1;
+
   // There might be multiple jobs for a compilation. Extract the specified
   // output filename from the last job.
   auto LastCmd = C->getJobs().end();
@@ -867,13 +870,6 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
 
-  DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
-                                    EagerLoadModules);
-  llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
-  std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
-  for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
-    WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
-
   std::vector<tooling::CompileCommand> Inputs =
       AdjustingCompilations->getAllCompileCommands();
 
@@ -893,102 +889,118 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   if (Format == ScanningOutputFormat::Full)
     FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
 
-  if (Verbose) {
-    llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
-                 << " files using " << Pool.getMaxConcurrency() << " workers\n";
-  }
-
-  llvm::Timer T;
-  T.startTimer();
-
-  for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
-    Pool.async([&, I]() {
-      llvm::DenseSet<ModuleID> AlreadySeenModules;
-      while (auto MaybeInputIndex = GetNextInputIndex()) {
-        size_t LocalIndex = *MaybeInputIndex;
-        const tooling::CompileCommand *Input = &Inputs[LocalIndex];
-        std::string Filename = std::move(Input->Filename);
-        std::string CWD = std::move(Input->Directory);
-
-        std::optional<StringRef> MaybeModuleName;
-        if (!ModuleName.empty())
-          MaybeModuleName = ModuleName;
-
-        std::string OutputDir(ModuleFilesDir);
-        if (OutputDir.empty())
-          OutputDir = getModuleCachePath(Input->CommandLine);
-        auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
-          return ::lookupModuleOutput(MID, MOK, OutputDir);
-        };
-
-        // Run the tool on it.
-        if (Format == ScanningOutputFormat::Make) {
-          auto MaybeFile =
-              WorkerTools[I]->getDependencyFile(Input->CommandLine, CWD);
-          if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
-                                             Errs))
-            HadErrors = true;
-        } else if (Format == ScanningOutputFormat::P1689) {
-          // It is useful to generate the make-format dependency output during
-          // the scanning for P1689. Otherwise the users need to scan again for
-          // it. We will generate the make-format dependency output if we find
-          // `-MF` in the command lines.
-          std::string MakeformatOutputPath;
-          std::string MakeformatOutput;
-
-          auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile(
-              *Input, CWD, MakeformatOutput, MakeformatOutputPath);
-
-          if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
-            HadErrors = true;
+  std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
 
-          if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() &&
-              !HadErrors) {
-            static std::mutex Lock;
-            // With compilation database, we may open different files
-            // concurrently or we may write the same file concurrently. So we
-            // use a map here to allow multiple compile commands to write to the
-            // same file. Also we need a lock here to avoid data race.
-            static llvm::StringMap<llvm::raw_fd_ostream> OSs;
-            std::unique_lock<std::mutex> LockGuard(Lock);
-
-            auto OSIter = OSs.find(MakeformatOutputPath);
-            if (OSIter == OSs.end()) {
-              std::error_code EC;
-              OSIter = OSs.try_emplace(MakeformatOutputPath,
-                                       MakeformatOutputPath, EC)
-                           .first;
-              if (EC)
-                llvm::errs()
-                    << "Failed to open P1689 make format output file \""
-                    << MakeformatOutputPath << "\" for " << EC.message()
-                    << "\n";
-            }
+  auto ScanningTask = [&](unsigned I) {
+    llvm::DenseSet<ModuleID> AlreadySeenModules;
+    while (auto MaybeInputIndex = GetNextInputIndex()) {
+      size_t LocalIndex = *MaybeInputIndex;
+      const tooling::CompileCommand *Input = &Inputs[LocalIndex];
+      std::string Filename = std::move(Input->Filename);
+      std::string CWD = std::move(Input->Directory);
+
+      std::optional<StringRef> MaybeModuleName;
+      if (!ModuleName.empty())
+        MaybeModuleName = ModuleName;
+
+      std::string OutputDir(ModuleFilesDir);
+      if (OutputDir.empty())
+        OutputDir = getModuleCachePath(Input->CommandLine);
+      auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
+        return ::lookupModuleOutput(MID, MOK, OutputDir);
+      };
 
-            SharedStream MakeformatOS(OSIter->second);
-            llvm::Expected<std::string> MaybeOutput(MakeformatOutput);
-            if (handleMakeDependencyToolResult(Filename, MaybeOutput,
-                                               MakeformatOS, Errs))
-              HadErrors = true;
+      // Run the tool on it.
+      if (Format == ScanningOutputFormat::Make) {
+        auto MaybeFile =
+            WorkerTools[I]->getDependencyFile(Input->CommandLine, CWD);
+        if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
+                                           Errs))
+          HadErrors = true;
+      } else if (Format == ScanningOutputFormat::P1689) {
+        // It is useful to generate the make-format dependency output during
+        // the scanning for P1689. Otherwise the users need to scan again for
+        // it. We will generate the make-format dependency output if we find
+        // `-MF` in the command lines.
+        std::string MakeformatOutputPath;
+        std::string MakeformatOutput;
+
+        auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile(
+            *Input, CWD, MakeformatOutput, MakeformatOutputPath);
+
+        if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
+          HadErrors = true;
+
+        if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() &&
+            !HadErrors) {
+          static std::mutex Lock;
+          // With compilation database, we may open different files
+          // concurrently or we may write the same file concurrently. So we
+          // use a map here to allow multiple compile commands to write to the
+          // same file. Also we need a lock here to avoid data race.
+          static llvm::StringMap<llvm::raw_fd_ostream> OSs;
+          std::unique_lock<std::mutex> LockGuard(Lock);
+
+          auto OSIter = OSs.find(MakeformatOutputPath);
+          if (OSIter == OSs.end()) {
+            std::error_code EC;
+            OSIter =
+                OSs.try_emplace(MakeformatOutputPath, MakeformatOutputPath, EC)
+                    .first;
+            if (EC)
+              llvm::errs() << "Failed to open P1689 make format output file \""
+                           << MakeformatOutputPath << "\" for " << EC.message()
+                           << "\n";
           }
-        } else if (MaybeModuleName) {
-          auto MaybeModuleDepsGraph = WorkerTools[I]->getModuleDependencies(
-              *MaybeModuleName, Input->CommandLine, CWD, AlreadySeenModules,
-              LookupOutput);
-          if (handleModuleResult(*MaybeModuleName, MaybeModuleDepsGraph, *FD,
-                                 LocalIndex, DependencyOS, Errs))
-            HadErrors = true;
-        } else {
-          auto MaybeTUDeps = WorkerTools[I]->getTranslationUnitDependencies(
-              Input->CommandLine, CWD, AlreadySeenModules, LookupOutput);
-          if (handleTranslationUnitResult(Filename, MaybeTUDeps, *FD,
-                                          LocalIndex, DependencyOS, Errs))
+
+          SharedStream MakeformatOS(OSIter->second);
+          llvm::Expected<std::string> MaybeOutput(MakeformatOutput);
+          if (handleMakeDependencyToolResult(Filename, MaybeOutput,
+                                             MakeformatOS, Errs))
             HadErrors = true;
         }
+      } else if (MaybeModuleName) {
+        auto MaybeModuleDepsGraph = WorkerTools[I]->getModuleDependencies(
+            *MaybeModuleName, Input->CommandLine, CWD, AlreadySeenModules,
+            LookupOutput);
+        if (handleModuleResult(*MaybeModuleName, MaybeModuleDepsGraph, *FD,
+                               LocalIndex, DependencyOS, Errs))
+          HadErrors = true;
+      } else {
+        auto MaybeTUDeps = WorkerTools[I]->getTranslationUnitDependencies(
+            Input->CommandLine, CWD, AlreadySeenModules, LookupOutput);
+        if (handleTranslationUnitResult(Filename, MaybeTUDeps, *FD, LocalIndex,
+                                        DependencyOS, Errs))
+          HadErrors = true;
       }
-    });
+    }
+  };
+
+  DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
+                                    EagerLoadModules);
+
+  llvm::Timer T;
+  T.startTimer();
+
+  if (NumThreads == 1) {
+    WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
+    ScanningTask(0);
+  } else {
+    llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
+    for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
+      WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
+
+    if (Verbose) {
+      llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
+                   << " files using " << Pool.getMaxConcurrency()
+                   << " workers\n";
+    }
+
+    for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
+      Pool.async([I, ScanningTask]() { ScanningTask(I); });
+    }
+    Pool.wait();
   }
-  Pool.wait();
 
   T.stopTimer();
   if (PrintTiming)

>From a2c194c49344371209606617d18008faf89ee0a5 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Fri, 8 Mar 2024 10:47:30 +0800
Subject: [PATCH 2/5] Address comments

---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 843816a8ed6515..2fd85c8e651c75 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -889,9 +889,7 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   if (Format == ScanningOutputFormat::Full)
     FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
 
-  std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
-
-  auto ScanningTask = [&](unsigned I) {
+  auto ScanningTask = [&](DependencyScanningTool &WorkerTool) {
     llvm::DenseSet<ModuleID> AlreadySeenModules;
     while (auto MaybeInputIndex = GetNextInputIndex()) {
       size_t LocalIndex = *MaybeInputIndex;
@@ -913,7 +911,7 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
       // Run the tool on it.
       if (Format == ScanningOutputFormat::Make) {
         auto MaybeFile =
-            WorkerTools[I]->getDependencyFile(Input->CommandLine, CWD);
+            WorkerTool.getDependencyFile(Input->CommandLine, CWD);
         if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
                                            Errs))
           HadErrors = true;
@@ -925,7 +923,7 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
         std::string MakeformatOutputPath;
         std::string MakeformatOutput;
 
-        auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile(
+        auto MaybeRule = WorkerTool.getP1689ModuleDependencyFile(
             *Input, CWD, MakeformatOutput, MakeformatOutputPath);
 
         if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
@@ -960,14 +958,14 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
             HadErrors = true;
         }
       } else if (MaybeModuleName) {
-        auto MaybeModuleDepsGraph = WorkerTools[I]->getModuleDependencies(
+        auto MaybeModuleDepsGraph = WorkerTool.getModuleDependencies(
             *MaybeModuleName, Input->CommandLine, CWD, AlreadySeenModules,
             LookupOutput);
         if (handleModuleResult(*MaybeModuleName, MaybeModuleDepsGraph, *FD,
                                LocalIndex, DependencyOS, Errs))
           HadErrors = true;
       } else {
-        auto MaybeTUDeps = WorkerTools[I]->getTranslationUnitDependencies(
+        auto MaybeTUDeps = WorkerTool.getTranslationUnitDependencies(
             Input->CommandLine, CWD, AlreadySeenModules, LookupOutput);
         if (handleTranslationUnitResult(Filename, MaybeTUDeps, *FD, LocalIndex,
                                         DependencyOS, Errs))
@@ -982,13 +980,14 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   llvm::Timer T;
   T.startTimer();
 
-  if (NumThreads == 1) {
-    WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
-    ScanningTask(0);
+  if (Inputs.size() == 1) {
+    DependencyScanningTool WorkerTool(Service);
+    ScanningTask(WorkerTool);
   } else {
     llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
+    std::vector<DependencyScanningTool> WorkerTools;
     for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
-      WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
+      WorkerTools.emplace_back(Service);
 
     if (Verbose) {
       llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
@@ -997,7 +996,9 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
     }
 
     for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
-      Pool.async([I, ScanningTask]() { ScanningTask(I); });
+      Pool.async([ScanningTask, &WorkerTools, I]() {
+        ScanningTask(WorkerTools[I]);
+      });
     }
     Pool.wait();
   }

>From 9014bba6f4dc4656403763ae825ea318c3d84aae Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Fri, 8 Mar 2024 10:51:11 +0800
Subject: [PATCH 3/5] fmt

---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 2fd85c8e651c75..832be0df649d1c 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -910,8 +910,7 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
 
       // Run the tool on it.
       if (Format == ScanningOutputFormat::Make) {
-        auto MaybeFile =
-            WorkerTool.getDependencyFile(Input->CommandLine, CWD);
+        auto MaybeFile = WorkerTool.getDependencyFile(Input->CommandLine, CWD);
         if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
                                            Errs))
           HadErrors = true;
@@ -996,9 +995,8 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
     }
 
     for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
-      Pool.async([ScanningTask, &WorkerTools, I]() {
-        ScanningTask(WorkerTools[I]);
-      });
+      Pool.async(
+          [ScanningTask, &WorkerTools, I]() { ScanningTask(WorkerTools[I]); });
     }
     Pool.wait();
   }

>From a6a501f69637cba0ae494b34f9d57c2e0905a40a Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Tue, 12 Mar 2024 09:39:56 +0800
Subject: [PATCH 4/5] Remove NumThreads=1

---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 832be0df649d1c..8df86f5a6e96ca 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -744,9 +744,6 @@ getCompilationDataBase(int argc, char **argv, std::string &ErrorMessage) {
     return nullptr;
   }
 
-  // Only 1 threads is required if P1689 per file mode.
-  NumThreads = 1;
-
   // There might be multiple jobs for a compilation. Extract the specified
   // output filename from the last job.
   auto LastCmd = C->getJobs().end();

>From 127426ca23055f4e193c87151f957a9706fb57b2 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Tue, 12 Mar 2024 10:12:59 +0800
Subject: [PATCH 5/5] fmt

---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 8df86f5a6e96ca..eaa76dd43e41dd 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -886,7 +886,9 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   if (Format == ScanningOutputFormat::Full)
     FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
 
-  auto ScanningTask = [&](DependencyScanningTool &WorkerTool) {
+  auto ScanningTask = [&](DependencyScanningService &Service) {
+    DependencyScanningTool WorkerTool(Service);
+
     llvm::DenseSet<ModuleID> AlreadySeenModules;
     while (auto MaybeInputIndex = GetNextInputIndex()) {
       size_t LocalIndex = *MaybeInputIndex;
@@ -977,13 +979,9 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   T.startTimer();
 
   if (Inputs.size() == 1) {
-    DependencyScanningTool WorkerTool(Service);
-    ScanningTask(WorkerTool);
+    ScanningTask(Service);
   } else {
     llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
-    std::vector<DependencyScanningTool> WorkerTools;
-    for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
-      WorkerTools.emplace_back(Service);
 
     if (Verbose) {
       llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
@@ -991,10 +989,9 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
                    << " workers\n";
     }
 
-    for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
-      Pool.async(
-          [ScanningTask, &WorkerTools, I]() { ScanningTask(WorkerTools[I]); });
-    }
+    for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
+      Pool.async([ScanningTask, &Service]() { ScanningTask(Service); });
+
     Pool.wait();
   }
 



More information about the cfe-commits mailing list