[clang] 4368616 - [clang][Dependency Scanning] Refactor Scanning Compiler Instance Initialization (#161300)

via cfe-commits cfe-commits at lists.llvm.org
Fri Oct 3 16:26:46 PDT 2025


Author: Qiongsi Wu
Date: 2025-10-03T16:26:42-07:00
New Revision: 4368616452476472e3d776c9ae72be34fa674146

URL: https://github.com/llvm/llvm-project/commit/4368616452476472e3d776c9ae72be34fa674146
DIFF: https://github.com/llvm/llvm-project/commit/4368616452476472e3d776c9ae72be34fa674146.diff

LOG: [clang][Dependency Scanning] Refactor Scanning Compiler Instance Initialization (#161300)

This PR follows https://github.com/llvm/llvm-project/pull/160795, and it
is the second of a series of planned PRs to land
https://github.com/llvm/llvm-project/pull/160207 in smaller pieces.

The initialization steps before and within
`DependencyScanningAction::runInvocation` are broken up in to several
helper functions. The intention is to reuse the helper functions in a
followup PR to initialize the `CompilerInstanceWithContext`.

Part of work for rdar://136303612.

Added: 
    

Modified: 
    clang/lib/Tooling/DependencyScanning/CMakeLists.txt
    clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
    clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h
    clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt
index 53a2728bd5786..76bdc50097fff 100644
--- a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt
+++ b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt
@@ -24,6 +24,5 @@ add_clang_library(clangDependencyScanning
   clangFrontend
   clangLex
   clangSerialization
-  clangTooling
   ${LLVM_PTHREAD_LIB}
   )

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
index 010380dee3617..e1f4d0d361185 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
@@ -9,8 +9,10 @@
 #include "DependencyScannerImpl.h"
 #include "clang/Basic/DiagnosticFrontend.h"
 #include "clang/Basic/DiagnosticSerialization.h"
+#include "clang/Driver/Driver.h"
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
+#include "llvm/TargetParser/Host.h"
 
 using namespace clang;
 using namespace tooling;
@@ -332,11 +334,9 @@ class ScanningDependencyDirectivesGetter : public DependencyDirectivesGetter {
     return DepFS->getDirectiveTokens(File.getName());
   }
 };
-} // namespace
 
 /// Sanitize diagnostic options for dependency scan.
-void clang::tooling::dependencies::sanitizeDiagOpts(
-    DiagnosticOptions &DiagOpts) {
+void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
   // Don't print 'X warnings and Y errors generated'.
   DiagOpts.ShowCarets = false;
   // Don't write out diagnostic file.
@@ -355,44 +355,146 @@ void clang::tooling::dependencies::sanitizeDiagOpts(
         .Default(true);
   });
 }
+} // namespace
 
-bool DependencyScanningAction::runInvocation(
-    std::shared_ptr<CompilerInvocation> Invocation,
-    IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
-    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
-    DiagnosticConsumer *DiagConsumer) {
-  // Making sure that we canonicalize the defines before we create the deep
-  // copy to avoid unnecessary variants in the scanner and in the resulting
-  // explicit command lines.
-  if (any(Service.getOptimizeArgs() & ScanningOptimizations::Macros))
-    canonicalizeDefines(Invocation->getPreprocessorOpts());
+namespace clang::tooling::dependencies {
+std::unique_ptr<DiagnosticOptions>
+createDiagOptions(ArrayRef<std::string> CommandLine) {
+  std::vector<const char *> CLI;
+  for (const std::string &Arg : CommandLine)
+    CLI.push_back(Arg.c_str());
+  auto DiagOpts = CreateAndPopulateDiagOpts(CLI);
+  sanitizeDiagOpts(*DiagOpts);
+  return DiagOpts;
+}
 
-  // Make a deep copy of the original Clang invocation.
-  CompilerInvocation OriginalInvocation(*Invocation);
+DignosticsEngineWithDiagOpts::DignosticsEngineWithDiagOpts(
+    ArrayRef<std::string> CommandLine,
+    IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, DiagnosticConsumer &DC) {
+  std::vector<const char *> CCommandLine(CommandLine.size(), nullptr);
+  llvm::transform(CommandLine, CCommandLine.begin(),
+                  [](const std::string &Str) { return Str.c_str(); });
+  DiagOpts = CreateAndPopulateDiagOpts(CCommandLine);
+  sanitizeDiagOpts(*DiagOpts);
+  DiagEngine = CompilerInstance::createDiagnostics(*FS, *DiagOpts, &DC,
+                                                   /*ShouldOwnClient=*/false);
+}
 
-  if (Scanned) {
-    // Scanning runs once for the first -cc1 invocation in a chain of driver
-    // jobs. For any dependent jobs, reuse the scanning result and just
-    // update the LastCC1Arguments to correspond to the new invocation.
-    // FIXME: to support multi-arch builds, each arch requires a separate scan
-    setLastCC1Arguments(std::move(OriginalInvocation));
-    return true;
+std::pair<std::unique_ptr<driver::Driver>, std::unique_ptr<driver::Compilation>>
+buildCompilation(ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags,
+                 IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) {
+  SmallVector<const char *, 256> Argv;
+  Argv.reserve(ArgStrs.size());
+  for (const std::string &Arg : ArgStrs)
+    Argv.push_back(Arg.c_str());
+
+  std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>(
+      Argv[0], llvm::sys::getDefaultTargetTriple(), Diags,
+      "clang LLVM compiler", FS);
+  Driver->setTitle("clang_based_tool");
+
+  llvm::BumpPtrAllocator Alloc;
+  bool CLMode = driver::IsClangCL(
+      driver::getDriverMode(Argv[0], ArrayRef(Argv).slice(1)));
+
+  if (llvm::Error E =
+          driver::expandResponseFiles(Argv, CLMode, Alloc, FS.get())) {
+    Diags.Report(diag::err_drv_expand_response_file)
+        << llvm::toString(std::move(E));
+    return std::make_pair(nullptr, nullptr);
   }
 
-  Scanned = true;
+  std::unique_ptr<driver::Compilation> Compilation(
+      Driver->BuildCompilation(Argv));
+  if (!Compilation)
+    return std::make_pair(nullptr, nullptr);
 
-  // Create a compiler instance to handle the actual work.
-  auto ModCache = makeInProcessModuleCache(Service.getModuleCacheEntries());
-  ScanInstanceStorage.emplace(std::move(Invocation), std::move(PCHContainerOps),
-                              ModCache.get());
-  CompilerInstance &ScanInstance = *ScanInstanceStorage;
+  if (Compilation->containsError())
+    return std::make_pair(nullptr, nullptr);
+
+  return std::make_pair(std::move(Driver), std::move(Compilation));
+}
+
+std::unique_ptr<CompilerInvocation>
+createCompilerInvocation(ArrayRef<std::string> CommandLine,
+                         DiagnosticsEngine &Diags) {
+  llvm::opt::ArgStringList Argv;
+  for (const std::string &Str : ArrayRef(CommandLine).drop_front())
+    Argv.push_back(Str.c_str());
+
+  auto Invocation = std::make_unique<CompilerInvocation>();
+  if (!CompilerInvocation::CreateFromArgs(*Invocation, Argv, Diags)) {
+    // FIXME: Should we just go on like cc1_main does?
+    return nullptr;
+  }
+  return Invocation;
+}
+
+std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>>
+initVFSForTUBuferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
+                          ArrayRef<std::string> CommandLine,
+                          StringRef WorkingDirectory,
+                          llvm::MemoryBufferRef TUBuffer) {
+  // Reset what might have been modified in the previous worker invocation.
+  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+
+  IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS;
+  auto OverlayFS =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
+  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
+  auto InputPath = TUBuffer.getBufferIdentifier();
+  InMemoryFS->addFile(
+      InputPath, 0, llvm::MemoryBuffer::getMemBufferCopy(TUBuffer.getBuffer()));
+  IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
+
+  OverlayFS->pushOverlay(InMemoryOverlay);
+  ModifiedFS = OverlayFS;
+  std::vector<std::string> ModifiedCommandLine(CommandLine);
+  ModifiedCommandLine.emplace_back(InputPath);
+
+  return std::make_pair(ModifiedFS, ModifiedCommandLine);
+}
+
+std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>>
+initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
+                         ArrayRef<std::string> CommandLine,
+                         StringRef WorkingDirectory, StringRef ModuleName) {
+  // Reset what might have been modified in the previous worker invocation.
+  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+
+  // If we're scanning based on a module name alone, we don't expect the client
+  // to provide us with an input file. However, the driver really wants to have
+  // one. Let's just make it up to make the driver happy.
+  auto OverlayFS =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
+  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
+  SmallString<128> FakeInputPath;
+  // TODO: We should retry the creation if the path already exists.
+  llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath,
+                                  /*MakeAbsolute=*/false);
+  InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
+  IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
+  OverlayFS->pushOverlay(InMemoryOverlay);
+
+  std::vector<std::string> ModifiedCommandLine(CommandLine);
+  ModifiedCommandLine.emplace_back(FakeInputPath);
+
+  return std::make_pair(OverlayFS, ModifiedCommandLine);
+}
+
+bool initializeScanCompilerInstance(
+    CompilerInstance &ScanInstance,
+    IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+    DiagnosticConsumer *DiagConsumer, DependencyScanningService &Service,
+    IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS) {
   ScanInstance.setBuildingModule(false);
 
   ScanInstance.createVirtualFileSystem(FS, DiagConsumer);
 
   // Create the compiler's actual diagnostics engine.
   sanitizeDiagOpts(ScanInstance.getDiagnosticOpts());
-  assert(!DiagConsumerFinished && "attempt to reuse finished consumer");
   ScanInstance.createDiagnostics(DiagConsumer, /*ShouldOwnClient=*/false);
   if (!ScanInstance.hasDiagnostics())
     return false;
@@ -435,6 +537,26 @@ bool DependencyScanningAction::runInvocation(
 
   ScanInstance.createSourceManager();
 
+  // Consider 
diff erent header search and diagnostic options to create
+  // 
diff erent modules. This avoids the unsound aliasing of module PCMs.
+  //
+  // TODO: Implement diagnostic bucketing to reduce the impact of strict
+  // context hashing.
+  ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true;
+  ScanInstance.getHeaderSearchOpts().ModulesSerializeOnlyPreprocessor = true;
+  ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
+  ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
+  ScanInstance.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true;
+  ScanInstance.getHeaderSearchOpts().ModulesForceValidateUserHeaders = false;
+
+  // Avoid some checks and module map parsing when loading PCM files.
+  ScanInstance.getPreprocessorOpts().ModulesCheckRelocated = false;
+
+  return true;
+}
+
+llvm::SmallVector<StringRef>
+getInitialStableDirs(const CompilerInstance &ScanInstance) {
   // Create a collection of stable directories derived from the ScanInstance
   // for determining whether module dependencies would fully resolve from
   // those directories.
@@ -442,7 +564,12 @@ bool DependencyScanningAction::runInvocation(
   const StringRef Sysroot = ScanInstance.getHeaderSearchOpts().Sysroot;
   if (!Sysroot.empty() && (llvm::sys::path::root_directory(Sysroot) != Sysroot))
     StableDirs = {Sysroot, ScanInstance.getHeaderSearchOpts().ResourceDir};
+  return StableDirs;
+}
 
+std::optional<PrebuiltModulesAttrsMap>
+computePrebuiltModulesASTMap(CompilerInstance &ScanInstance,
+                             llvm::SmallVector<StringRef> &StableDirs) {
   // Store a mapping of prebuilt module files and their properties like header
   // search options. This will prevent the implicit build to create duplicate
   // modules and will force reuse of the existing prebuilt module files
@@ -454,12 +581,14 @@ bool DependencyScanningAction::runInvocation(
             ScanInstance.getPreprocessorOpts().ImplicitPCHInclude, ScanInstance,
             ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles,
             PrebuiltModulesASTMap, ScanInstance.getDiagnostics(), StableDirs))
-      return false;
+      return {};
 
-  // Create the dependency collector that will collect the produced
-  // dependencies.
-  //
-  // This also moves the existing dependency output options from the
+  return PrebuiltModulesASTMap;
+}
+
+std::unique_ptr<DependencyOutputOptions>
+takeDependencyOutputOptionsFrom(CompilerInstance &ScanInstance) {
+  // This function moves the existing dependency output options from the
   // invocation to the collector. The options in the invocation are reset,
   // which ensures that the compiler won't create new dependency collectors,
   // and thus won't write out the extra '.d' files to disk.
@@ -472,35 +601,85 @@ bool DependencyScanningAction::runInvocation(
                                      ScanInstance.getFrontendOpts().Inputs)};
   Opts->IncludeSystemHeaders = true;
 
+  return Opts;
+}
+
+std::shared_ptr<ModuleDepCollector> initializeScanInstanceDependencyCollector(
+    CompilerInstance &ScanInstance,
+    std::unique_ptr<DependencyOutputOptions> DepOutputOpts,
+    StringRef WorkingDirectory, DependencyConsumer &Consumer,
+    DependencyScanningService &Service, CompilerInvocation &Inv,
+    DependencyActionController &Controller,
+    PrebuiltModulesAttrsMap PrebuiltModulesASTMap,
+    llvm::SmallVector<StringRef> &StableDirs) {
+  std::shared_ptr<ModuleDepCollector> MDC;
   switch (Service.getFormat()) {
   case ScanningOutputFormat::Make:
     ScanInstance.addDependencyCollector(
         std::make_shared<DependencyConsumerForwarder>(
-            std::move(Opts), WorkingDirectory, Consumer));
+            std::move(DepOutputOpts), WorkingDirectory, Consumer));
     break;
   case ScanningOutputFormat::P1689:
   case ScanningOutputFormat::Full:
     MDC = std::make_shared<ModuleDepCollector>(
-        Service, std::move(Opts), ScanInstance, Consumer, Controller,
-        OriginalInvocation, std::move(PrebuiltModulesASTMap), StableDirs);
+        Service, std::move(DepOutputOpts), ScanInstance, Consumer, Controller,
+        Inv, std::move(PrebuiltModulesASTMap), StableDirs);
     ScanInstance.addDependencyCollector(MDC);
     break;
   }
 
-  // Consider 
diff erent header search and diagnostic options to create
-  // 
diff erent modules. This avoids the unsound aliasing of module PCMs.
-  //
-  // TODO: Implement diagnostic bucketing to reduce the impact of strict
-  // context hashing.
-  ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true;
-  ScanInstance.getHeaderSearchOpts().ModulesSerializeOnlyPreprocessor = true;
-  ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
-  ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
-  ScanInstance.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true;
-  ScanInstance.getHeaderSearchOpts().ModulesForceValidateUserHeaders = false;
+  return MDC;
+}
+} // namespace clang::tooling::dependencies
 
-  // Avoid some checks and module map parsing when loading PCM files.
-  ScanInstance.getPreprocessorOpts().ModulesCheckRelocated = false;
+bool DependencyScanningAction::runInvocation(
+    std::unique_ptr<CompilerInvocation> Invocation,
+    IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    DiagnosticConsumer *DiagConsumer) {
+  // Making sure that we canonicalize the defines before we create the deep
+  // copy to avoid unnecessary variants in the scanner and in the resulting
+  // explicit command lines.
+  if (any(Service.getOptimizeArgs() & ScanningOptimizations::Macros))
+    canonicalizeDefines(Invocation->getPreprocessorOpts());
+
+  // Make a deep copy of the original Clang invocation.
+  CompilerInvocation OriginalInvocation(*Invocation);
+
+  if (Scanned) {
+    // Scanning runs once for the first -cc1 invocation in a chain of driver
+    // jobs. For any dependent jobs, reuse the scanning result and just
+    // update the LastCC1Arguments to correspond to the new invocation.
+    // FIXME: to support multi-arch builds, each arch requires a separate scan
+    setLastCC1Arguments(std::move(OriginalInvocation));
+    return true;
+  }
+
+  Scanned = true;
+
+  // Create a compiler instance to handle the actual work.
+  auto ModCache = makeInProcessModuleCache(Service.getModuleCacheEntries());
+  ScanInstanceStorage.emplace(std::move(Invocation), std::move(PCHContainerOps),
+                              ModCache.get());
+  CompilerInstance &ScanInstance = *ScanInstanceStorage;
+
+  assert(!DiagConsumerFinished && "attempt to reuse finished consumer");
+  if (!initializeScanCompilerInstance(ScanInstance, FS, DiagConsumer, Service,
+                                      DepFS))
+    return false;
+
+  llvm::SmallVector<StringRef> StableDirs = getInitialStableDirs(ScanInstance);
+  auto MaybePrebuiltModulesASTMap =
+      computePrebuiltModulesASTMap(ScanInstance, StableDirs);
+  if (!MaybePrebuiltModulesASTMap)
+    return false;
+
+  auto DepOutputOpts = takeDependencyOutputOptionsFrom(ScanInstance);
+
+  MDC = initializeScanInstanceDependencyCollector(
+      ScanInstance, std::move(DepOutputOpts), WorkingDirectory, Consumer,
+      Service, OriginalInvocation, Controller, *MaybePrebuiltModulesASTMap,
+      StableDirs);
 
   std::unique_ptr<FrontendAction> Action;
 

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h
index 32fbcfffde53c..71c6731803597 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h
@@ -9,8 +9,10 @@
 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNER_H
 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNER_H
 
+#include "clang/Driver/Compilation.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Serialization/ObjectFilePCHContainerReader.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
@@ -30,12 +32,12 @@ class DependencyScanningAction {
   DependencyScanningAction(
       DependencyScanningService &Service, StringRef WorkingDirectory,
       DependencyConsumer &Consumer, DependencyActionController &Controller,
-      llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,
+      IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,
       std::optional<StringRef> ModuleName = std::nullopt)
       : Service(Service), WorkingDirectory(WorkingDirectory),
         Consumer(Consumer), Controller(Controller), DepFS(std::move(DepFS)),
         ModuleName(ModuleName) {}
-  bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
+  bool runInvocation(std::unique_ptr<CompilerInvocation> Invocation,
                      IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
                      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
                      DiagnosticConsumer *DiagConsumer);
@@ -63,7 +65,7 @@ class DependencyScanningAction {
   StringRef WorkingDirectory;
   DependencyConsumer &Consumer;
   DependencyActionController &Controller;
-  llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
+  IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
   std::optional<StringRef> ModuleName;
   std::optional<CompilerInstance> ScanInstanceStorage;
   std::shared_ptr<ModuleDepCollector> MDC;
@@ -72,9 +74,81 @@ class DependencyScanningAction {
   bool DiagConsumerFinished = false;
 };
 
-// Helper functions
-void sanitizeDiagOpts(DiagnosticOptions &DiagOpts);
+// Helper functions and data types.
+std::unique_ptr<DiagnosticOptions>
+createDiagOptions(ArrayRef<std::string> CommandLine);
 
+struct DignosticsEngineWithDiagOpts {
+  // We need to bound the lifetime of the DiagOpts used to create the
+  // DiganosticsEngine with the DiagnosticsEngine itself.
+  std::unique_ptr<DiagnosticOptions> DiagOpts;
+  IntrusiveRefCntPtr<DiagnosticsEngine> DiagEngine;
+
+  DignosticsEngineWithDiagOpts(ArrayRef<std::string> CommandLine,
+                               IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+                               DiagnosticConsumer &DC);
+};
+
+struct TextDiagnosticsPrinterWithOutput {
+  // We need to bound the lifetime of the data that supports the DiagPrinter
+  // with it together so they have the same lifetime.
+  std::string DiagnosticOutput;
+  llvm::raw_string_ostream DiagnosticsOS;
+  std::unique_ptr<DiagnosticOptions> DiagOpts;
+  TextDiagnosticPrinter DiagPrinter;
+
+  TextDiagnosticsPrinterWithOutput(ArrayRef<std::string> CommandLine)
+      : DiagnosticsOS(DiagnosticOutput),
+        DiagOpts(createDiagOptions(CommandLine)),
+        DiagPrinter(DiagnosticsOS, *DiagOpts) {}
+};
+
+std::pair<std::unique_ptr<driver::Driver>, std::unique_ptr<driver::Compilation>>
+buildCompilation(ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags,
+                 IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
+
+std::unique_ptr<CompilerInvocation>
+createCompilerInvocation(ArrayRef<std::string> CommandLine,
+                         DiagnosticsEngine &Diags);
+
+std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>>
+initVFSForTUBuferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
+                          ArrayRef<std::string> CommandLine,
+                          StringRef WorkingDirectory,
+                          llvm::MemoryBufferRef TUBuffer);
+
+std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>>
+initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
+                         ArrayRef<std::string> CommandLine,
+                         StringRef WorkingDirectory, StringRef ModuleName);
+
+bool initializeScanCompilerInstance(
+    CompilerInstance &ScanInstance,
+    IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+    DiagnosticConsumer *DiagConsumer, DependencyScanningService &Service,
+    IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS);
+
+SmallVector<StringRef>
+getInitialStableDirs(const CompilerInstance &ScanInstance);
+
+std::optional<PrebuiltModulesAttrsMap>
+computePrebuiltModulesASTMap(CompilerInstance &ScanInstance,
+                             SmallVector<StringRef> &StableDirs);
+
+std::unique_ptr<DependencyOutputOptions>
+takeDependencyOutputOptionsFrom(CompilerInstance &ScanInstance);
+
+/// Create the dependency collector that will collect the produced
+/// dependencies. May return the created ModuleDepCollector depending
+/// on the scanning format.
+std::shared_ptr<ModuleDepCollector> initializeScanInstanceDependencyCollector(
+    CompilerInstance &ScanInstance,
+    std::unique_ptr<DependencyOutputOptions> DepOutputOpts,
+    StringRef WorkingDirectory, DependencyConsumer &Consumer,
+    DependencyScanningService &Service, CompilerInvocation &Inv,
+    DependencyActionController &Controller,
+    PrebuiltModulesAttrsMap PrebuiltModulesASTMap,
+    llvm::SmallVector<StringRef> &StableDirs);
 } // namespace dependencies
 } // namespace tooling
 } // namespace clang

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 796e587ba9147..95154212603ac 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -8,29 +8,9 @@
 
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "DependencyScannerImpl.h"
-#include "clang/Basic/DiagnosticDriver.h"
 #include "clang/Basic/DiagnosticFrontend.h"
-#include "clang/Basic/DiagnosticSerialization.h"
-#include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
-#include "clang/Driver/Job.h"
 #include "clang/Driver/Tool.h"
-#include "clang/Frontend/CompilerInstance.h"
-#include "clang/Frontend/CompilerInvocation.h"
-#include "clang/Frontend/FrontendActions.h"
-#include "clang/Frontend/TextDiagnosticPrinter.h"
-#include "clang/Frontend/Utils.h"
-#include "clang/Lex/PreprocessorOptions.h"
-#include "clang/Serialization/ObjectFilePCHContainerReader.h"
-#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
-#include "clang/Tooling/DependencyScanning/InProcessModuleCache.h"
-#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/TargetParser/Host.h"
-#include <optional>
 
 using namespace clang;
 using namespace tooling;
@@ -63,32 +43,19 @@ DependencyScanningWorker::DependencyScanningWorker(
   }
 }
 
-static std::unique_ptr<DiagnosticOptions>
-createDiagOptions(const std::vector<std::string> &CommandLine) {
-  std::vector<const char *> CLI;
-  for (const std::string &Arg : CommandLine)
-    CLI.push_back(Arg.c_str());
-  auto DiagOpts = CreateAndPopulateDiagOpts(CLI);
-  sanitizeDiagOpts(*DiagOpts);
-  return DiagOpts;
-}
-
 llvm::Error DependencyScanningWorker::computeDependencies(
     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
     DependencyConsumer &Consumer, DependencyActionController &Controller,
     std::optional<llvm::MemoryBufferRef> TUBuffer) {
   // Capture the emitted diagnostics and report them to the client
   // in the case of a failure.
-  std::string DiagnosticOutput;
-  llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
-  auto DiagOpts = createDiagOptions(CommandLine);
-  TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, *DiagOpts);
+  TextDiagnosticsPrinterWithOutput DiagPrinterWithOS(CommandLine);
 
   if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
-                          DiagPrinter, TUBuffer))
+                          DiagPrinterWithOS.DiagPrinter, TUBuffer))
     return llvm::Error::success();
-  return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
-                                             llvm::inconvertibleErrorCode());
+  return llvm::make_error<llvm::StringError>(
+      DiagPrinterWithOS.DiagnosticsOS.str(), llvm::inconvertibleErrorCode());
 }
 
 llvm::Error DependencyScanningWorker::computeDependencies(
@@ -97,51 +64,24 @@ llvm::Error DependencyScanningWorker::computeDependencies(
     StringRef ModuleName) {
   // Capture the emitted diagnostics and report them to the client
   // in the case of a failure.
-  std::string DiagnosticOutput;
-  llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
-  auto DiagOpts = createDiagOptions(CommandLine);
-  TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, *DiagOpts);
+  TextDiagnosticsPrinterWithOutput DiagPrinterWithOS(CommandLine);
 
   if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
-                          DiagPrinter, ModuleName))
+                          DiagPrinterWithOS.DiagPrinter, ModuleName))
     return llvm::Error::success();
-  return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
-                                             llvm::inconvertibleErrorCode());
+  return llvm::make_error<llvm::StringError>(
+      DiagPrinterWithOS.DiagnosticsOS.str(), llvm::inconvertibleErrorCode());
 }
 
 static bool forEachDriverJob(
     ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags,
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
     llvm::function_ref<bool(const driver::Command &Cmd)> Callback) {
-  SmallVector<const char *, 256> Argv;
-  Argv.reserve(ArgStrs.size());
-  for (const std::string &Arg : ArgStrs)
-    Argv.push_back(Arg.c_str());
-
-  std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>(
-      Argv[0], llvm::sys::getDefaultTargetTriple(), Diags,
-      "clang LLVM compiler", FS);
-  Driver->setTitle("clang_based_tool");
-
-  llvm::BumpPtrAllocator Alloc;
-  bool CLMode = driver::IsClangCL(
-      driver::getDriverMode(Argv[0], ArrayRef(Argv).slice(1)));
-
-  if (llvm::Error E =
-          driver::expandResponseFiles(Argv, CLMode, Alloc, FS.get())) {
-    Diags.Report(diag::err_drv_expand_response_file)
-        << llvm::toString(std::move(E));
-    return false;
-  }
-
-  const std::unique_ptr<driver::Compilation> Compilation(
-      Driver->BuildCompilation(llvm::ArrayRef(Argv)));
+  // Compilation holds a non-owning a reference to the Driver, hence we need to
+  // keep the Driver alive when we use Compilation.
+  auto [Driver, Compilation] = buildCompilation(ArgStrs, Diags, FS);
   if (!Compilation)
     return false;
-
-  if (Compilation->containsError())
-    return false;
-
   for (const driver::Command &Job : Compilation->getJobs()) {
     if (!Callback(Job))
       return false;
@@ -150,30 +90,21 @@ static bool forEachDriverJob(
 }
 
 static bool createAndRunToolInvocation(
-    std::vector<std::string> CommandLine, DependencyScanningAction &Action,
+    const std::vector<std::string> &CommandLine,
+    DependencyScanningAction &Action,
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
     std::shared_ptr<clang::PCHContainerOperations> &PCHContainerOps,
     DiagnosticsEngine &Diags, DependencyConsumer &Consumer) {
-
-  // Save executable path before providing CommandLine to ToolInvocation
-  std::string Executable = CommandLine[0];
-
-  llvm::opt::ArgStringList Argv;
-  for (const std::string &Str : ArrayRef(CommandLine).drop_front())
-    Argv.push_back(Str.c_str());
-
-  auto Invocation = std::make_shared<CompilerInvocation>();
-  if (!CompilerInvocation::CreateFromArgs(*Invocation, Argv, Diags)) {
-    // FIXME: Should we just go on like cc1_main does?
+  auto Invocation = createCompilerInvocation(CommandLine, Diags);
+  if (!Invocation)
     return false;
-  }
 
   if (!Action.runInvocation(std::move(Invocation), std::move(FS),
                             PCHContainerOps, Diags.getClient()))
     return false;
 
   std::vector<std::string> Args = Action.takeLastCC1Arguments();
-  Consumer.handleBuildCommand({std::move(Executable), std::move(Args)});
+  Consumer.handleBuildCommand({CommandLine[0], std::move(Args)});
   return true;
 }
 
@@ -182,24 +113,19 @@ bool DependencyScanningWorker::scanDependencies(
     DependencyConsumer &Consumer, DependencyActionController &Controller,
     DiagnosticConsumer &DC, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
     std::optional<StringRef> ModuleName) {
-  std::vector<const char *> CCommandLine(CommandLine.size(), nullptr);
-  llvm::transform(CommandLine, CCommandLine.begin(),
-                  [](const std::string &Str) { return Str.c_str(); });
-  auto DiagOpts = CreateAndPopulateDiagOpts(CCommandLine);
-  sanitizeDiagOpts(*DiagOpts);
-  auto Diags = CompilerInstance::createDiagnostics(*FS, *DiagOpts, &DC,
-                                                   /*ShouldOwnClient=*/false);
-
+  DignosticsEngineWithDiagOpts DiagEngineWithCmdAndOpts(CommandLine, FS, DC);
   DependencyScanningAction Action(Service, WorkingDirectory, Consumer,
                                   Controller, DepFS, ModuleName);
 
   bool Success = false;
   if (CommandLine[1] == "-cc1") {
-    Success = createAndRunToolInvocation(CommandLine, Action, FS,
-                                         PCHContainerOps, *Diags, Consumer);
+    Success = createAndRunToolInvocation(
+        CommandLine, Action, FS, PCHContainerOps,
+        *DiagEngineWithCmdAndOpts.DiagEngine, Consumer);
   } else {
     Success = forEachDriverJob(
-        CommandLine, *Diags, FS, [&](const driver::Command &Cmd) {
+        CommandLine, *DiagEngineWithCmdAndOpts.DiagEngine, FS,
+        [&](const driver::Command &Cmd) {
           if (StringRef(Cmd.getCreator().getName()) != "clang") {
             // Non-clang command. Just pass through to the dependency
             // consumer.
@@ -218,13 +144,15 @@ bool DependencyScanningWorker::scanDependencies(
           // system to ensure that any file system requests that
           // are made by the driver do not go through the
           // dependency scanning filesystem.
-          return createAndRunToolInvocation(std::move(Argv), Action, FS,
-                                            PCHContainerOps, *Diags, Consumer);
+          return createAndRunToolInvocation(
+              std::move(Argv), Action, FS, PCHContainerOps,
+              *DiagEngineWithCmdAndOpts.DiagEngine, Consumer);
         });
   }
 
   if (Success && !Action.hasScanned())
-    Diags->Report(diag::err_fe_expected_compiler_job)
+    DiagEngineWithCmdAndOpts.DiagEngine->Report(
+        diag::err_fe_expected_compiler_job)
         << llvm::join(CommandLine, " ");
 
   // Ensure finish() is called even if we never reached ExecuteAction().
@@ -238,66 +166,25 @@ bool DependencyScanningWorker::computeDependencies(
     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
     DependencyConsumer &Consumer, DependencyActionController &Controller,
     DiagnosticConsumer &DC, std::optional<llvm::MemoryBufferRef> TUBuffer) {
-  // Reset what might have been modified in the previous worker invocation.
-  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
-
-  std::optional<std::vector<std::string>> ModifiedCommandLine;
-  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS;
-
-  // If we're scanning based on a module name alone, we don't expect the client
-  // to provide us with an input file. However, the driver really wants to have
-  // one. Let's just make it up to make the driver happy.
   if (TUBuffer) {
-    auto OverlayFS =
-        llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
-    auto InMemoryFS =
-        llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
-    InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
-    auto InputPath = TUBuffer->getBufferIdentifier();
-    InMemoryFS->addFile(
-        InputPath, 0,
-        llvm::MemoryBuffer::getMemBufferCopy(TUBuffer->getBuffer()));
-    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay =
-        InMemoryFS;
-
-    OverlayFS->pushOverlay(InMemoryOverlay);
-    ModifiedFS = OverlayFS;
-    ModifiedCommandLine = CommandLine;
-    ModifiedCommandLine->emplace_back(InputPath);
+    auto [FinalFS, FinalCommandLine] = initVFSForTUBuferScanning(
+        BaseFS, CommandLine, WorkingDirectory, *TUBuffer);
+    return scanDependencies(WorkingDirectory, FinalCommandLine, Consumer,
+                            Controller, DC, FinalFS,
+                            /*ModuleName=*/std::nullopt);
+  } else {
+    BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+    return scanDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
+                            DC, BaseFS, /*ModuleName=*/std::nullopt);
   }
-
-  const std::vector<std::string> &FinalCommandLine =
-      ModifiedCommandLine ? *ModifiedCommandLine : CommandLine;
-  auto &FinalFS = ModifiedFS ? ModifiedFS : BaseFS;
-
-  return scanDependencies(WorkingDirectory, FinalCommandLine, Consumer,
-                          Controller, DC, FinalFS, /*ModuleName=*/std::nullopt);
 }
 
 bool DependencyScanningWorker::computeDependencies(
     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
     DependencyConsumer &Consumer, DependencyActionController &Controller,
     DiagnosticConsumer &DC, StringRef ModuleName) {
-  // Reset what might have been modified in the previous worker invocation.
-  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
-
-  // If we're scanning based on a module name alone, we don't expect the client
-  // to provide us with an input file. However, the driver really wants to have
-  // one. Let's just make it up to make the driver happy.
-  auto OverlayFS =
-      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
-  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
-  InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
-  SmallString<128> FakeInputPath;
-  // TODO: We should retry the creation if the path already exists.
-  llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath,
-                                  /*MakeAbsolute=*/false);
-  InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
-
-  OverlayFS->pushOverlay(InMemoryOverlay);
-  auto ModifiedCommandLine = CommandLine;
-  ModifiedCommandLine.emplace_back(FakeInputPath);
+  auto [OverlayFS, ModifiedCommandLine] = initVFSForByNameScanning(
+      BaseFS, CommandLine, WorkingDirectory, ModuleName);
 
   return scanDependencies(WorkingDirectory, ModifiedCommandLine, Consumer,
                           Controller, DC, OverlayFS, ModuleName);


        


More information about the cfe-commits mailing list