[clang] [clang][Dependency Scanning] Implementing `CompilerInstanceWithContext` to Speedup By Name Scans (PR #160207)

Qiongsi Wu via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 23 11:04:03 PDT 2025


https://github.com/qiongsiwu updated https://github.com/llvm/llvm-project/pull/160207

>From b4d2b6ddd19c04d7347a24a840b8a36c76a4eec0 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Mon, 22 Sep 2025 09:36:21 -0700
Subject: [PATCH 1/5] Initial commit, adding CompilerInstanceWithContext header
 and implementation.

---
 .../include/clang/Frontend/CompilerInstance.h |   6 +
 clang/include/clang/Frontend/Utils.h          |   4 +
 clang/include/clang/Lex/Preprocessor.h        |   1 +
 .../CompilerInstanceWithContext.h             |  90 +++++++
 .../DependencyScanningWorker.h                |  46 ++++
 .../DependencyScanning/ModuleDepCollector.h   |   9 +-
 .../Tooling/DependencyScanning/CMakeLists.txt |   1 +
 .../CompilerInstanceWithContext.cpp           | 249 +++++++++++++++++
 .../DependencyScanningWorker.cpp              | 254 ++++++++----------
 .../DependencyScanning/ModuleDepCollector.cpp |   9 +-
 10 files changed, 523 insertions(+), 146 deletions(-)
 create mode 100644 clang/include/clang/Tooling/DependencyScanning/CompilerInstanceWithContext.h
 create mode 100644 clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp

diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index a6b6993b708d0..2fdfbe01fbe78 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -948,6 +948,12 @@ class CompilerInstance : public ModuleLoader {
     DependencyCollectors.push_back(std::move(Listener));
   }
 
+  void clearDependencyCollectors() { DependencyCollectors.clear(); }
+
+  std::vector<std::shared_ptr<DependencyCollector>> &getDependencyCollectors() {
+    return DependencyCollectors;
+  }
+
   void setExternalSemaSource(IntrusiveRefCntPtr<ExternalSemaSource> ESS);
 
   ModuleCache &getModuleCache() const { return *ModCache; }
diff --git a/clang/include/clang/Frontend/Utils.h b/clang/include/clang/Frontend/Utils.h
index f86c2f5074de0..1b52d970ff1a3 100644
--- a/clang/include/clang/Frontend/Utils.h
+++ b/clang/include/clang/Frontend/Utils.h
@@ -40,6 +40,7 @@ class DiagnosticsEngine;
 class ExternalSemaSource;
 class FrontendOptions;
 class PCHContainerReader;
+class PPCallbacks;
 class Preprocessor;
 class PreprocessorOptions;
 class PreprocessorOutputOptions;
@@ -87,6 +88,9 @@ class DependencyCollector {
                                   bool IsSystem, bool IsModuleFile,
                                   bool IsMissing);
 
+  /// @return the PPCallback this collector added to the Preprocessor.
+  virtual PPCallbacks *getPPCallback() { return nullptr; };
+
 protected:
   /// Return true if the filename was added to the list of dependencies, false
   /// otherwise.
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 39754847a93e4..953902b13783f 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1327,6 +1327,7 @@ class Preprocessor {
                                                 std::move(Callbacks));
     Callbacks = std::move(C);
   }
+  void removePPCallbacks() { Callbacks.reset(); }
   /// \}
 
   /// Get the number of tokens processed so far.
diff --git a/clang/include/clang/Tooling/DependencyScanning/CompilerInstanceWithContext.h b/clang/include/clang/Tooling/DependencyScanning/CompilerInstanceWithContext.h
new file mode 100644
index 0000000000000..5a2cb25d9d972
--- /dev/null
+++ b/clang/include/clang/Tooling/DependencyScanning/CompilerInstanceWithContext.h
@@ -0,0 +1,90 @@
+//===- CompilerInstanceWithContext.h - clang scanning compiler instance ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_COMPILERINSTANCEWITHCONTEXT_H
+#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_COMPILERINSTANCEWITHCONTEXT_H
+
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Driver/Compilation.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Serialization/ModuleCache.h"
+#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
+#include <string>
+#include <vector>
+
+namespace clang {
+namespace tooling {
+namespace dependencies {
+
+// Forward declarations.
+class DependencyScanningWorker;
+class DependencyConsumer;
+class DependencyActionController;
+
+class CompilerInstanceWithContext {
+  // Context
+  DependencyScanningWorker &Worker;
+  llvm::StringRef CWD;
+  std::vector<std::string> CommandLine;
+  static const uint64_t MAX_NUM_NAMES = (1 << 12);
+  static const std::string FakeFileBuffer;
+
+  // Context - file systems
+  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFS;
+  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFS;
+  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay;
+
+  // Context - Diagnostics engine, file manager and source mamanger.
+  std::string DiagnosticOutput;
+  llvm::raw_string_ostream DiagnosticsOS;
+  std::unique_ptr<TextDiagnosticPrinter> DiagPrinter;
+  IntrusiveRefCntPtr<DiagnosticsEngine> Diags;
+  std::unique_ptr<FileManager> FileMgr;
+  std::unique_ptr<SourceManager> SrcMgr;
+
+  // Context - compiler invocation
+  std::unique_ptr<clang::driver::Driver> Driver;
+  std::unique_ptr<clang::driver::Compilation> Compilation;
+  std::unique_ptr<CompilerInvocation> Invocation;
+  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFSFromCompilerInvocation;
+
+  // Context - output options
+  std::unique_ptr<DependencyOutputOptions> OutputOpts;
+
+  // Context - stable directory handling
+  llvm::SmallVector<StringRef> StableDirs;
+  PrebuiltModulesAttrsMap PrebuiltModuleVFSMap;
+
+  // Compiler Instance
+  IntrusiveRefCntPtr<ModuleCache> ModCache;
+  std::unique_ptr<CompilerInstance> CIPtr;
+
+  //   // Source location offset.
+  int32_t SrcLocOffset = 0;
+
+public:
+  CompilerInstanceWithContext(DependencyScanningWorker &Worker, StringRef CWD,
+                              const std::vector<std::string> &CMD)
+      : Worker(Worker), CWD(CWD), CommandLine(CMD),
+        DiagnosticsOS(DiagnosticOutput) {};
+
+  llvm::Error initialize();
+  llvm::Error computeDependencies(StringRef ModuleName,
+                                  DependencyConsumer &Consumer,
+                                  DependencyActionController &Controller);
+  llvm::Error finalize();
+};
+} // namespace dependencies
+} // namespace tooling
+} // namespace clang
+
+#endif
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
index 6060e4b43312e..fa38197e39097 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -13,6 +13,7 @@
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Frontend/PCHContainerOperations.h"
+#include "clang/Tooling/DependencyScanning/CompilerInstanceWithContext.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
 #include "llvm/Support/Error.h"
@@ -74,6 +75,22 @@ class DependencyActionController {
                                          ModuleOutputKind Kind) = 0;
 };
 
+/// Some helper functions for the dependency scanning worker.
+std::string
+deduceDepTarget(const std::string &OutputFile,
+                const SmallVectorImpl<FrontendInputFile> &InputFiles);
+void canonicalizeDefines(PreprocessorOptions &PPOpts);
+void sanitizeDiagOpts(DiagnosticOptions &DiagOpts);
+std::unique_ptr<DiagnosticOptions>
+createDiagOptions(const std::vector<std::string> &CommandLine);
+
+using PrebuiltModuleFilesT = decltype(HeaderSearchOptions::PrebuiltModuleFiles);
+bool visitPrebuiltModule(StringRef PrebuiltModuleFilename, CompilerInstance &CI,
+                         PrebuiltModuleFilesT &ModuleFiles,
+                         PrebuiltModulesAttrsMap &PrebuiltModulesASTMap,
+                         DiagnosticsEngine &Diags,
+                         const ArrayRef<StringRef> StableDirs);
+
 /// An individual dependency scanning worker that is able to run on its own
 /// thread.
 ///
@@ -151,6 +168,9 @@ class DependencyScanningWorker {
   /// (passed in the constructor).
   llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
 
+  friend class CompilerInstanceWithContext;
+  std::unique_ptr<CompilerInstanceWithContext> CIWithContext;
+
   /// Private helper functions.
   bool scanDependencies(StringRef WorkingDirectory,
                         const std::vector<std::string> &CommandLine,
@@ -161,6 +181,32 @@ class DependencyScanningWorker {
                         std::optional<StringRef> ModuleName);
 };
 
+class ScanningDependencyDirectivesGetter : public DependencyDirectivesGetter {
+  DependencyScanningWorkerFilesystem *DepFS;
+
+public:
+  ScanningDependencyDirectivesGetter(FileManager &FileMgr) : DepFS(nullptr) {
+    FileMgr.getVirtualFileSystem().visit([&](llvm::vfs::FileSystem &FS) {
+      auto *DFS = llvm::dyn_cast<DependencyScanningWorkerFilesystem>(&FS);
+      if (DFS) {
+        assert(!DepFS && "Found multiple scanning VFSs");
+        DepFS = DFS;
+      }
+    });
+    assert(DepFS && "Did not find scanning VFS");
+  }
+
+  std::unique_ptr<DependencyDirectivesGetter>
+  cloneFor(FileManager &FileMgr) override {
+    return std::make_unique<ScanningDependencyDirectivesGetter>(FileMgr);
+  }
+
+  std::optional<ArrayRef<dependency_directives_scan::Directive>>
+  operator()(FileEntryRef File) override {
+    return DepFS->getDirectiveTokens(File.getName());
+  }
+};
+
 } // end namespace dependencies
 } // end namespace tooling
 } // end namespace clang
diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
index 4136cb73f7043..c79dbffa5c263 100644
--- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
+++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
@@ -282,11 +282,12 @@ class ModuleDepCollector final : public DependencyCollector {
                      CompilerInstance &ScanInstance, DependencyConsumer &C,
                      DependencyActionController &Controller,
                      CompilerInvocation OriginalCI,
-                     const PrebuiltModulesAttrsMap PrebuiltModulesASTMap,
+                     const PrebuiltModulesAttrsMap &PrebuiltModulesASTMap,
                      const ArrayRef<StringRef> StableDirs);
 
   void attachToPreprocessor(Preprocessor &PP) override;
   void attachToASTReader(ASTReader &R) override;
+  PPCallbacks *getPPCallback() override { return CollectorPPPtr; }
 
   /// Apply any changes implied by the discovered dependencies to the given
   /// invocation, (e.g. disable implicit modules, add explicit module paths).
@@ -305,7 +306,7 @@ class ModuleDepCollector final : public DependencyCollector {
   DependencyActionController &Controller;
   /// Mapping from prebuilt AST filepaths to their attributes referenced during
   /// dependency collecting.
-  const PrebuiltModulesAttrsMap PrebuiltModulesASTMap;
+  const PrebuiltModulesAttrsMap &PrebuiltModulesASTMap;
   /// Directory paths known to be stable through an active development and build
   /// cycle.
   const ArrayRef<StringRef> StableDirs;
@@ -339,6 +340,10 @@ class ModuleDepCollector final : public DependencyCollector {
   std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
   std::vector<P1689ModuleInfo> RequiredStdCXXModules;
 
+  /// A pointer to the preprocessor callback so we can invoke it directly
+  /// if needed.
+  ModuleDepCollectorPP *CollectorPPPtr = nullptr;
+
   /// Checks whether the module is known as being prebuilt.
   bool isPrebuiltModule(const Module *M);
 
diff --git a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt
index 42a63faa26d3e..9cb73109902e2 100644
--- a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt
+++ b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt
@@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 add_clang_library(clangDependencyScanning
+  CompilerInstanceWithContext.cpp
   DependencyScanningFilesystem.cpp
   DependencyScanningService.cpp
   DependencyScanningWorker.cpp
diff --git a/clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp b/clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp
new file mode 100644
index 0000000000000..7406509ce7bba
--- /dev/null
+++ b/clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp
@@ -0,0 +1,249 @@
+//===- CompilerInstanceWithContext.cpp - clang scanning compiler instance -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/DependencyScanning/CompilerInstanceWithContext.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
+#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
+#include "llvm/TargetParser/Host.h"
+
+using namespace clang;
+using namespace tooling;
+using namespace dependencies;
+
+const std::string CompilerInstanceWithContext::FakeFileBuffer =
+    std::string(MAX_NUM_NAMES, ' ');
+
+llvm::Error CompilerInstanceWithContext::initialize() {
+  // Virtual file system setup
+  // - Set the current working directory.
+  Worker.BaseFS->setCurrentWorkingDirectory(CWD);
+  OverlayFS =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(Worker.BaseFS);
+  InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS->setCurrentWorkingDirectory(CWD);
+
+  // - Create the fake file as scanning input source file and setup overlay
+  //   FS.
+  SmallString<128> FakeInputPath;
+  llvm::sys::fs::createUniquePath("ScanningCI-%%%%%%%%.input", FakeInputPath,
+                                  /*MakeAbsolute=*/false);
+  InMemoryFS->addFile(FakeInputPath, 0,
+                      llvm::MemoryBuffer::getMemBuffer(FakeFileBuffer));
+  InMemoryOverlay = InMemoryFS;
+  // TODO: we need to handle CAS/CASFS here.
+  //    if (Worker.CAS && !Worker.DepCASFS)
+  //     InMemoryOverlay = llvm::cas::createCASProvidingFileSystem(
+  //         Worker.CAS, std::move(InMemoryFS));
+  OverlayFS->pushOverlay(InMemoryOverlay);
+
+  // Augument the command line.
+  CommandLine.emplace_back(FakeInputPath);
+
+  // Create the file manager, the diagnostics engine, and the source manager.
+  FileMgr = std::make_unique<FileManager>(FileSystemOptions{}, OverlayFS);
+  DiagnosticOutput.clear();
+  auto DiagOpts = createDiagOptions(CommandLine);
+  DiagPrinter = std::make_unique<TextDiagnosticPrinter>(DiagnosticsOS,
+                                                        *(DiagOpts.release()));
+  std::vector<const char *> CCommandLine(CommandLine.size(), nullptr);
+  llvm::transform(CommandLine, CCommandLine.begin(),
+                  [](const std::string &Str) { return Str.c_str(); });
+  DiagOpts = CreateAndPopulateDiagOpts(CCommandLine);
+  sanitizeDiagOpts(*DiagOpts);
+  Diags = CompilerInstance::createDiagnostics(*OverlayFS, *(DiagOpts.release()),
+                                              DiagPrinter.get(),
+                                              /*ShouldOwnClient=*/false);
+  SrcMgr = std::make_unique<SourceManager>(*Diags, *FileMgr);
+  Diags->setSourceManager(SrcMgr.get());
+
+  // Create the compiler invocation.
+  Driver = std::make_unique<driver::Driver>(
+      CCommandLine[0], llvm::sys::getDefaultTargetTriple(), *Diags,
+      "clang LLVM compiler", OverlayFS);
+  Driver->setTitle("clang_based_tool");
+  Compilation.reset(Driver->BuildCompilation(llvm::ArrayRef(CCommandLine)));
+
+  if (Compilation->containsError()) {
+    return llvm::make_error<llvm::StringError>("Failed to build compilation",
+                                               llvm::inconvertibleErrorCode());
+  }
+
+  const driver::Command &Command = *(Compilation->getJobs().begin());
+  const auto &CommandArgs = Command.getArguments();
+  size_t ArgSize = CommandArgs.size();
+  assert(ArgSize >= 1 && "Cannot have a command with 0 args");
+  const char *FirstArg = CommandArgs[0];
+  if (strcmp(FirstArg, "-cc1"))
+    return llvm::make_error<llvm::StringError>(
+        "Incorrect compilation command, missing cc1",
+        llvm::inconvertibleErrorCode());
+  Invocation = std::make_unique<CompilerInvocation>();
+  CompilerInvocation::CreateFromArgs(*Invocation, Command.getArguments(),
+                                     *Diags, Command.getExecutable());
+  Invocation->getFrontendOpts().DisableFree = false;
+  Invocation->getCodeGenOpts().DisableFree = false;
+
+  if (any(Worker.Service.getOptimizeArgs() & ScanningOptimizations::Macros))
+    canonicalizeDefines(Invocation->getPreprocessorOpts());
+
+  // Create the CompilerInstance.
+  ModCache = makeInProcessModuleCache(Worker.Service.getModuleCacheEntries());
+  CIPtr = std::make_unique<CompilerInstance>(
+      std::make_shared<CompilerInvocation>(*Invocation), Worker.PCHContainerOps,
+      ModCache.get());
+  auto &CI = *CIPtr;
+
+  // TODO: the commented out code here should be un-commented when
+  // we enable CAS.
+  // CI.getInvocation().getCASOpts() = Worker.CASOpts;
+  CI.setBuildingModule(false);
+  sanitizeDiagOpts(CI.getDiagnosticOpts());
+  CI.createDiagnostics(DiagPrinter.get(), false);
+  CI.getPreprocessorOpts().AllowPCHWithDifferentModulesCachePath = true;
+  CI.getFrontendOpts().GenerateGlobalModuleIndex = false;
+  CI.getFrontendOpts().UseGlobalModuleIndex = false;
+  // CI.getFrontendOpts().ModulesShareFileManager = Worker.DepCASFS ? false :
+  // true;
+  CI.getHeaderSearchOpts().ModuleFormat = "raw";
+  CI.getHeaderSearchOpts().ModulesIncludeVFSUsage =
+      any(Worker.Service.getOptimizeArgs() & ScanningOptimizations::VFS);
+  CI.getHeaderSearchOpts().ModulesStrictContextHash = true;
+  CI.getHeaderSearchOpts().ModulesSerializeOnlyPreprocessor = true;
+  CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
+  CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
+  CI.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true;
+  CI.getPreprocessorOpts().ModulesCheckRelocated = false;
+
+  if (CI.getHeaderSearchOpts().ModulesValidateOncePerBuildSession)
+    CI.getHeaderSearchOpts().BuildSessionTimestamp =
+        Worker.Service.getBuildSessionTimestamp();
+
+  CI.setDiagnostics(Diags.get());
+
+  auto *FileMgr = CI.createFileManager();
+
+  if (Worker.DepFS) {
+    Worker.DepFS->resetBypassedPathPrefix();
+    if (!CI.getHeaderSearchOpts().ModuleCachePath.empty()) {
+      SmallString<256> ModulesCachePath;
+      normalizeModuleCachePath(
+          *FileMgr, CI.getHeaderSearchOpts().ModuleCachePath, ModulesCachePath);
+      Worker.DepFS->setBypassedPathPrefix(ModulesCachePath);
+    }
+
+    CI.setDependencyDirectivesGetter(
+        std::make_unique<ScanningDependencyDirectivesGetter>(*FileMgr));
+  }
+
+  CI.createSourceManager(*FileMgr);
+
+  const StringRef Sysroot = CI.getHeaderSearchOpts().Sysroot;
+  if (!Sysroot.empty() && (llvm::sys::path::root_directory(Sysroot) != Sysroot))
+    StableDirs = {Sysroot, CI.getHeaderSearchOpts().ResourceDir};
+  if (!CI.getPreprocessorOpts().ImplicitPCHInclude.empty())
+    if (visitPrebuiltModule(CI.getPreprocessorOpts().ImplicitPCHInclude, CI,
+                            CI.getHeaderSearchOpts().PrebuiltModuleFiles,
+                            PrebuiltModuleVFSMap, CI.getDiagnostics(),
+                            StableDirs))
+      return llvm::make_error<llvm::StringError>(
+          "Prebuilt module scanning failed", llvm::inconvertibleErrorCode());
+
+  OutputOpts = std::make_unique<DependencyOutputOptions>();
+  std::swap(*OutputOpts, CI.getInvocation().getDependencyOutputOpts());
+  // We need at least one -MT equivalent for the generator of make dependency
+  // files to work.
+  if (OutputOpts->Targets.empty())
+    OutputOpts->Targets = {deduceDepTarget(CI.getFrontendOpts().OutputFile,
+                                           CI.getFrontendOpts().Inputs)};
+  OutputOpts->IncludeSystemHeaders = true;
+
+  CI.createTarget();
+  // CI.initializeDelayedInputFileFromCAS();
+
+  return llvm::Error::success();
+}
+
+llvm::Error CompilerInstanceWithContext::computeDependencies(
+    StringRef ModuleName, DependencyConsumer &Consumer,
+    DependencyActionController &Controller) {
+  auto &CI = *CIPtr;
+  CompilerInvocation Inv(*Invocation);
+
+  auto Opts = std::make_unique<DependencyOutputOptions>(*OutputOpts);
+  auto MDC = std::make_shared<ModuleDepCollector>(
+      Worker.Service, std::move(Opts), CI, Consumer, Controller, Inv,
+      PrebuiltModuleVFSMap, StableDirs);
+
+  CI.clearDependencyCollectors();
+  CI.addDependencyCollector(MDC);
+
+  std::unique_ptr<FrontendAction> Action =
+      std::make_unique<GetDependenciesByModuleNameAction>(ModuleName);
+  auto InputFile = CI.getFrontendOpts().Inputs.begin();
+
+  if (!SrcLocOffset)
+    Action->BeginSourceFile(CI, *InputFile);
+  else {
+    CI.getPreprocessor().removePPCallbacks();
+  }
+
+  Preprocessor &PP = CI.getPreprocessor();
+  SourceManager &SM = PP.getSourceManager();
+  FileID MainFileID = SM.getMainFileID();
+  SourceLocation FileStart = SM.getLocForStartOfFile(MainFileID);
+  SourceLocation IDLocation = FileStart.getLocWithOffset(SrcLocOffset);
+  if (!SrcLocOffset)
+    PP.EnterSourceFile(MainFileID, nullptr, SourceLocation());
+  else {
+    auto DCs = CI.getDependencyCollectors();
+    for (auto &DC : DCs) {
+      DC->attachToPreprocessor(PP);
+      auto *CB = DC->getPPCallback();
+
+      FileID PrevFID;
+      SrcMgr::CharacteristicKind FileType =
+          SM.getFileCharacteristic(IDLocation);
+      CB->LexedFileChanged(MainFileID,
+                           PPChainedCallbacks::LexedFileChangeReason::EnterFile,
+                           FileType, PrevFID, IDLocation);
+    }
+  }
+
+  SrcLocOffset++;
+  SmallVector<IdentifierLoc, 2> Path;
+  IdentifierInfo *ModuleID = PP.getIdentifierInfo(ModuleName);
+  Path.emplace_back(IDLocation, ModuleID);
+  auto ModResult = CI.loadModule(IDLocation, Path, Module::Hidden, false);
+
+  auto DCs = CI.getDependencyCollectors();
+  for (auto &DC : DCs) {
+    auto *CB = DC->getPPCallback();
+    assert(CB && "DC must have dependency collector callback");
+    CB->moduleImport(SourceLocation(), Path, ModResult);
+    CB->EndOfMainFile();
+  }
+
+  MDC->applyDiscoveredDependencies(Inv);
+
+  // TODO: enable CAS
+  //   std::string ID = Inv.getFileSystemOpts().CASFileSystemRootID;
+  //   if (!ID.empty())
+  //     Consumer.handleCASFileSystemRootID(std::move(ID));
+  //   ID = Inv.getFrontendOpts().CASIncludeTreeID;
+  //   if (!ID.empty())
+  //     Consumer.handleIncludeTreeID(std::move(ID));
+
+  return llvm::Error::success();
+}
+
+llvm::Error CompilerInstanceWithContext::finalize() {
+  DiagPrinter->finish();
+  return llvm::Error::success();
+}
\ No newline at end of file
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 8375732e4aa33..a36635cba2b96 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -86,8 +86,6 @@ static bool checkHeaderSearchPaths(const HeaderSearchOptions &HSOpts,
   return false;
 }
 
-using PrebuiltModuleFilesT = decltype(HeaderSearchOptions::PrebuiltModuleFiles);
-
 /// A listener that collects the imported modules and the input
 /// files. While visiting, collect vfsoverlays and file inputs that determine
 /// whether prebuilt modules fully resolve in stable directories.
@@ -201,42 +199,6 @@ class PrebuiltModuleListener : public ASTReaderListener {
   const ArrayRef<StringRef> StableDirs;
 };
 
-/// Visit the given prebuilt module and collect all of the modules it
-/// transitively imports and contributing input files.
-static bool visitPrebuiltModule(StringRef PrebuiltModuleFilename,
-                                CompilerInstance &CI,
-                                PrebuiltModuleFilesT &ModuleFiles,
-                                PrebuiltModulesAttrsMap &PrebuiltModulesASTMap,
-                                DiagnosticsEngine &Diags,
-                                const ArrayRef<StringRef> StableDirs) {
-  // List of module files to be processed.
-  llvm::SmallVector<std::string> Worklist;
-
-  PrebuiltModuleListener Listener(ModuleFiles, Worklist, PrebuiltModulesASTMap,
-                                  CI.getHeaderSearchOpts(), CI.getLangOpts(),
-                                  Diags, StableDirs);
-
-  Listener.visitModuleFile(PrebuiltModuleFilename,
-                           serialization::MK_ExplicitModule);
-  if (ASTReader::readASTFileControlBlock(
-          PrebuiltModuleFilename, CI.getFileManager(), CI.getModuleCache(),
-          CI.getPCHContainerReader(),
-          /*FindModuleFileExtensions=*/false, Listener,
-          /*ValidateDiagnosticOptions=*/false, ASTReader::ARR_OutOfDate))
-    return true;
-
-  while (!Worklist.empty()) {
-    Listener.visitModuleFile(Worklist.back(), serialization::MK_ExplicitModule);
-    if (ASTReader::readASTFileControlBlock(
-            Worklist.pop_back_val(), CI.getFileManager(), CI.getModuleCache(),
-            CI.getPCHContainerReader(),
-            /*FindModuleFileExtensions=*/false, Listener,
-            /*ValidateDiagnosticOptions=*/false))
-      return true;
-  }
-  return false;
-}
-
 /// Transform arbitrary file name into an object-like file name.
 static std::string makeObjFileName(StringRef FileName) {
   SmallString<128> ObjFileName(FileName);
@@ -244,40 +206,6 @@ static std::string makeObjFileName(StringRef FileName) {
   return std::string(ObjFileName);
 }
 
-/// Deduce the dependency target based on the output file and input files.
-static std::string
-deduceDepTarget(const std::string &OutputFile,
-                const SmallVectorImpl<FrontendInputFile> &InputFiles) {
-  if (OutputFile != "-")
-    return OutputFile;
-
-  if (InputFiles.empty() || !InputFiles.front().isFile())
-    return "clang-scan-deps\\ dependency";
-
-  return makeObjFileName(InputFiles.front().getFile());
-}
-
-/// Sanitize diagnostic options for dependency scan.
-static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
-  // Don't print 'X warnings and Y errors generated'.
-  DiagOpts.ShowCarets = false;
-  // Don't write out diagnostic file.
-  DiagOpts.DiagnosticSerializationFile.clear();
-  // Don't emit warnings except for scanning specific warnings.
-  // TODO: It would be useful to add a more principled way to ignore all
-  //       warnings that come from source code. The issue is that we need to
-  //       ignore warnings that could be surpressed by
-  //       `#pragma clang diagnostic`, while still allowing some scanning
-  //       warnings for things we're not ready to turn into errors yet.
-  //       See `test/ClangScanDeps/diagnostic-pragmas.c` for an example.
-  llvm::erase_if(DiagOpts.Warnings, [](StringRef Warning) {
-    return llvm::StringSwitch<bool>(Warning)
-        .Cases("pch-vfs-diff", "error=pch-vfs-diff", false)
-        .StartsWith("no-error=", false)
-        .Default(true);
-  });
-}
-
 // Clang implements -D and -U by splatting text into a predefines buffer. This
 // allows constructs such as `-DFඞ=3 "-D F\u{0D9E} 4 3 2”` to be accepted and
 // define the same macro, or adding C++ style comments before the macro name.
@@ -316,64 +244,6 @@ static std::optional<StringRef> getSimpleMacroName(StringRef Macro) {
   return FinishName();
 }
 
-static void canonicalizeDefines(PreprocessorOptions &PPOpts) {
-  using MacroOpt = std::pair<StringRef, std::size_t>;
-  std::vector<MacroOpt> SimpleNames;
-  SimpleNames.reserve(PPOpts.Macros.size());
-  std::size_t Index = 0;
-  for (const auto &M : PPOpts.Macros) {
-    auto SName = getSimpleMacroName(M.first);
-    // Skip optimizing if we can't guarantee we can preserve relative order.
-    if (!SName)
-      return;
-    SimpleNames.emplace_back(*SName, Index);
-    ++Index;
-  }
-
-  llvm::stable_sort(SimpleNames, llvm::less_first());
-  // Keep the last instance of each macro name by going in reverse
-  auto NewEnd = std::unique(
-      SimpleNames.rbegin(), SimpleNames.rend(),
-      [](const MacroOpt &A, const MacroOpt &B) { return A.first == B.first; });
-  SimpleNames.erase(SimpleNames.begin(), NewEnd.base());
-
-  // Apply permutation.
-  decltype(PPOpts.Macros) NewMacros;
-  NewMacros.reserve(SimpleNames.size());
-  for (std::size_t I = 0, E = SimpleNames.size(); I != E; ++I) {
-    std::size_t OriginalIndex = SimpleNames[I].second;
-    // We still emit undefines here as they may be undefining a predefined macro
-    NewMacros.push_back(std::move(PPOpts.Macros[OriginalIndex]));
-  }
-  std::swap(PPOpts.Macros, NewMacros);
-}
-
-class ScanningDependencyDirectivesGetter : public DependencyDirectivesGetter {
-  DependencyScanningWorkerFilesystem *DepFS;
-
-public:
-  ScanningDependencyDirectivesGetter(FileManager &FileMgr) : DepFS(nullptr) {
-    FileMgr.getVirtualFileSystem().visit([&](llvm::vfs::FileSystem &FS) {
-      auto *DFS = llvm::dyn_cast<DependencyScanningWorkerFilesystem>(&FS);
-      if (DFS) {
-        assert(!DepFS && "Found multiple scanning VFSs");
-        DepFS = DFS;
-      }
-    });
-    assert(DepFS && "Did not find scanning VFS");
-  }
-
-  std::unique_ptr<DependencyDirectivesGetter>
-  cloneFor(FileManager &FileMgr) override {
-    return std::make_unique<ScanningDependencyDirectivesGetter>(FileMgr);
-  }
-
-  std::optional<ArrayRef<dependency_directives_scan::Directive>>
-  operator()(FileEntryRef File) override {
-    return DepFS->getDirectiveTokens(File.getName());
-  }
-};
-
 /// A clang tool that runs the preprocessor in a mode that's optimized for
 /// dependency scanning for the given compiler invocation.
 class DependencyScanningAction {
@@ -592,6 +462,120 @@ class DependencyScanningAction {
 
 } // end anonymous namespace
 
+/// Deduce the dependency target based on the output file and input files.
+std::string clang::tooling::dependencies::deduceDepTarget(
+    const std::string &OutputFile,
+    const SmallVectorImpl<FrontendInputFile> &InputFiles) {
+  if (OutputFile != "-")
+    return OutputFile;
+
+  if (InputFiles.empty() || !InputFiles.front().isFile())
+    return "clang-scan-deps\\ dependency";
+
+  return makeObjFileName(InputFiles.front().getFile());
+}
+
+/// Visit the given prebuilt module and collect all of the modules it
+/// transitively imports and contributing input files.
+bool clang::tooling::dependencies::visitPrebuiltModule(
+    StringRef PrebuiltModuleFilename, CompilerInstance &CI,
+    PrebuiltModuleFilesT &ModuleFiles,
+    PrebuiltModulesAttrsMap &PrebuiltModulesASTMap, DiagnosticsEngine &Diags,
+    const ArrayRef<StringRef> StableDirs) {
+  // List of module files to be processed.
+  llvm::SmallVector<std::string> Worklist;
+
+  PrebuiltModuleListener Listener(ModuleFiles, Worklist, PrebuiltModulesASTMap,
+                                  CI.getHeaderSearchOpts(), CI.getLangOpts(),
+                                  Diags, StableDirs);
+
+  Listener.visitModuleFile(PrebuiltModuleFilename,
+                           serialization::MK_ExplicitModule);
+  if (ASTReader::readASTFileControlBlock(
+          PrebuiltModuleFilename, CI.getFileManager(), CI.getModuleCache(),
+          CI.getPCHContainerReader(),
+          /*FindModuleFileExtensions=*/false, Listener,
+          /*ValidateDiagnosticOptions=*/false, ASTReader::ARR_OutOfDate))
+    return true;
+
+  while (!Worklist.empty()) {
+    Listener.visitModuleFile(Worklist.back(), serialization::MK_ExplicitModule);
+    if (ASTReader::readASTFileControlBlock(
+            Worklist.pop_back_val(), CI.getFileManager(), CI.getModuleCache(),
+            CI.getPCHContainerReader(),
+            /*FindModuleFileExtensions=*/false, Listener,
+            /*ValidateDiagnosticOptions=*/false))
+      return true;
+  }
+  return false;
+}
+
+void clang::tooling::dependencies::canonicalizeDefines(
+    PreprocessorOptions &PPOpts) {
+  using MacroOpt = std::pair<StringRef, std::size_t>;
+  std::vector<MacroOpt> SimpleNames;
+  SimpleNames.reserve(PPOpts.Macros.size());
+  std::size_t Index = 0;
+  for (const auto &M : PPOpts.Macros) {
+    auto SName = getSimpleMacroName(M.first);
+    // Skip optimizing if we can't guarantee we can preserve relative order.
+    if (!SName)
+      return;
+    SimpleNames.emplace_back(*SName, Index);
+    ++Index;
+  }
+
+  llvm::stable_sort(SimpleNames, llvm::less_first());
+  // Keep the last instance of each macro name by going in reverse
+  auto NewEnd = std::unique(
+      SimpleNames.rbegin(), SimpleNames.rend(),
+      [](const MacroOpt &A, const MacroOpt &B) { return A.first == B.first; });
+  SimpleNames.erase(SimpleNames.begin(), NewEnd.base());
+
+  // Apply permutation.
+  decltype(PPOpts.Macros) NewMacros;
+  NewMacros.reserve(SimpleNames.size());
+  for (std::size_t I = 0, E = SimpleNames.size(); I != E; ++I) {
+    std::size_t OriginalIndex = SimpleNames[I].second;
+    // We still emit undefines here as they may be undefining a predefined macro
+    NewMacros.push_back(std::move(PPOpts.Macros[OriginalIndex]));
+  }
+  std::swap(PPOpts.Macros, NewMacros);
+}
+
+/// Sanitize diagnostic options for dependency scan.
+void clang::tooling::dependencies::sanitizeDiagOpts(
+    DiagnosticOptions &DiagOpts) {
+  // Don't print 'X warnings and Y errors generated'.
+  DiagOpts.ShowCarets = false;
+  // Don't write out diagnostic file.
+  DiagOpts.DiagnosticSerializationFile.clear();
+  // Don't emit warnings except for scanning specific warnings.
+  // TODO: It would be useful to add a more principled way to ignore all
+  //       warnings that come from source code. The issue is that we need to
+  //       ignore warnings that could be surpressed by
+  //       `#pragma clang diagnostic`, while still allowing some scanning
+  //       warnings for things we're not ready to turn into errors yet.
+  //       See `test/ClangScanDeps/diagnostic-pragmas.c` for an example.
+  llvm::erase_if(DiagOpts.Warnings, [](StringRef Warning) {
+    return llvm::StringSwitch<bool>(Warning)
+        .Cases("pch-vfs-diff", "error=pch-vfs-diff", false)
+        .StartsWith("no-error=", false)
+        .Default(true);
+  });
+}
+
+std::unique_ptr<DiagnosticOptions>
+clang::tooling::dependencies::createDiagOptions(
+    const std::vector<std::string> &CommandLine) {
+  std::vector<const char *> CLI;
+  for (const std::string &Arg : CommandLine)
+    CLI.push_back(Arg.c_str());
+  auto DiagOpts = CreateAndPopulateDiagOpts(CLI);
+  sanitizeDiagOpts(*DiagOpts);
+  return DiagOpts;
+}
+
 DependencyScanningWorker::DependencyScanningWorker(
     DependencyScanningService &Service,
     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
@@ -619,16 +603,6 @@ DependencyScanningWorker::DependencyScanningWorker(
   }
 }
 
-static std::unique_ptr<DiagnosticOptions>
-createDiagOptions(const std::vector<std::string> &CommandLine) {
-  std::vector<const char *> CLI;
-  for (const std::string &Arg : CommandLine)
-    CLI.push_back(Arg.c_str());
-  auto DiagOpts = CreateAndPopulateDiagOpts(CLI);
-  sanitizeDiagOpts(*DiagOpts);
-  return DiagOpts;
-}
-
 llvm::Error DependencyScanningWorker::computeDependencies(
     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
     DependencyConsumer &Consumer, DependencyActionController &Controller,
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index d67178c153e88..263efe62eb179 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -951,17 +951,18 @@ ModuleDepCollector::ModuleDepCollector(
     std::unique_ptr<DependencyOutputOptions> Opts,
     CompilerInstance &ScanInstance, DependencyConsumer &C,
     DependencyActionController &Controller, CompilerInvocation OriginalCI,
-    const PrebuiltModulesAttrsMap PrebuiltModulesASTMap,
+    const PrebuiltModulesAttrsMap &PrebuiltModulesASTMap,
     const ArrayRef<StringRef> StableDirs)
     : Service(Service), ScanInstance(ScanInstance), Consumer(C),
-      Controller(Controller),
-      PrebuiltModulesASTMap(std::move(PrebuiltModulesASTMap)),
+      Controller(Controller), PrebuiltModulesASTMap(PrebuiltModulesASTMap),
       StableDirs(StableDirs), Opts(std::move(Opts)),
       CommonInvocation(
           makeCommonInvocationForModuleBuild(std::move(OriginalCI))) {}
 
 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
-  PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
+  auto CollectorPP = std::make_unique<ModuleDepCollectorPP>(*this);
+  CollectorPPPtr = CollectorPP.get();
+  PP.addPPCallbacks(std::move(CollectorPP));
 }
 
 void ModuleDepCollector::attachToASTReader(ASTReader &R) {}

>From 2fa6205a4929e968156c32198b982c7222b9fe05 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Mon, 22 Sep 2025 12:21:11 -0700
Subject: [PATCH 2/5] Teach clang-scan-deps to use CompilerInstanceWithContext.
 All tests passing.

---
 .../DependencyScanningTool.h                  | 10 ++++++++
 .../DependencyScanningWorker.h                | 10 ++++++++
 .../CompilerInstanceWithContext.cpp           |  1 +
 .../DependencyScanningTool.cpp                | 23 +++++++++++++++++++
 .../DependencyScanningWorker.cpp              | 20 ++++++++++++++++
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 20 +++++++++++++---
 6 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
index c3601a4e73e1f..2410c4aa99bcf 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
@@ -161,6 +161,16 @@ class DependencyScanningTool {
 
   llvm::vfs::FileSystem &getWorkerVFS() const { return Worker.getVFS(); }
 
+  /// TODO: add documentation.
+  llvm::Error initializeCompilerInstacneWithContext(
+      StringRef CWD, const std::vector<std::string> &CommandLine);
+
+  llvm::Expected<TranslationUnitDeps> computeDependenciesByNameWithContext(
+      StringRef ModuleName, const llvm::DenseSet<ModuleID> &AlreadySeen,
+      LookupModuleOutputCallback LookupModuleOutput);
+
+  llvm::Error finalizeCompilerInstanceWithContext();
+
 private:
   DependencyScanningWorker Worker;
 };
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
index fa38197e39097..d61b2a6d8024a 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -106,6 +106,10 @@ class DependencyScanningWorker {
   DependencyScanningWorker(DependencyScanningService &Service,
                            llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
 
+  llvm::Error initializeCompierInstanceWithContext(
+      StringRef CWD, const std::vector<std::string> &CommandLine);
+  llvm::Error finalizeCompilerInstanceWithContext();
+
   /// Run the dependency scanning tool for a given clang driver command-line,
   /// and report the discovered dependencies to the provided consumer. If
   /// TUBuffer is not nullopt, it is used as TU input for the dependency
@@ -153,6 +157,12 @@ class DependencyScanningWorker {
                                   DependencyActionController &Controller,
                                   StringRef ModuleName);
 
+  /// TODO: add documentation
+  llvm::Error
+  computeDependenciesByNameWithContext(StringRef ModuleName,
+                                       DependencyConsumer &Consumer,
+                                       DependencyActionController &Controller);
+
   llvm::vfs::FileSystem &getVFS() const { return *BaseFS; }
 
 private:
diff --git a/clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp b/clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp
index 7406509ce7bba..172a81003f7ba 100644
--- a/clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp
+++ b/clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp
@@ -104,6 +104,7 @@ llvm::Error CompilerInstanceWithContext::initialize() {
   // we enable CAS.
   // CI.getInvocation().getCASOpts() = Worker.CASOpts;
   CI.setBuildingModule(false);
+  CI.createVirtualFileSystem(OverlayFS, Diags->getClient());
   sanitizeDiagOpts(CI.getDiagnosticOpts());
   CI.createDiagnostics(DiagPrinter.get(), false);
   CI.getPreprocessorOpts().AllowPCHWithDifferentModulesCachePath = true;
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
index 27734ffd0e20b..bad35e6999f04 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
@@ -169,6 +169,29 @@ DependencyScanningTool::getModuleDependencies(
   return Consumer.takeTranslationUnitDeps();
 }
 
+llvm::Error DependencyScanningTool::initializeCompilerInstacneWithContext(
+    StringRef CWD, const std::vector<std::string> &CommandLine) {
+  return Worker.initializeCompierInstanceWithContext(CWD, CommandLine);
+}
+
+llvm::Expected<TranslationUnitDeps>
+DependencyScanningTool::computeDependenciesByNameWithContext(
+    StringRef ModuleName, const llvm::DenseSet<ModuleID> &AlreadySeen,
+    LookupModuleOutputCallback LookupModuleOutput) {
+  FullDependencyConsumer Consumer(AlreadySeen);
+  CallbackActionController Controller(LookupModuleOutput);
+  llvm::Error Result = Worker.computeDependenciesByNameWithContext(
+      ModuleName, Consumer, Controller);
+  if (Result)
+    return std::move(Result);
+
+  return Consumer.takeTranslationUnitDeps();
+}
+
+llvm::Error DependencyScanningTool::finalizeCompilerInstanceWithContext() {
+  return Worker.finalizeCompilerInstanceWithContext();
+}
+
 TranslationUnitDeps FullDependencyConsumer::takeTranslationUnitDeps() {
   TranslationUnitDeps TU;
 
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index a36635cba2b96..4344ecc320fc2 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -603,6 +603,19 @@ DependencyScanningWorker::DependencyScanningWorker(
   }
 }
 
+llvm::Error DependencyScanningWorker::initializeCompierInstanceWithContext(
+    StringRef CWD, const std::vector<std::string> &CommandLine) {
+  CIWithContext =
+      std::make_unique<CompilerInstanceWithContext>(*this, CWD, CommandLine);
+  return CIWithContext->initialize();
+}
+
+llvm::Error DependencyScanningWorker::finalizeCompilerInstanceWithContext() {
+  llvm::Error E = CIWithContext->finalize();
+  CIWithContext.reset();
+  return E;
+}
+
 llvm::Error DependencyScanningWorker::computeDependencies(
     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
     DependencyConsumer &Consumer, DependencyActionController &Controller,
@@ -833,4 +846,11 @@ bool DependencyScanningWorker::computeDependencies(
                           Controller, DC, OverlayFS, ModuleName);
 }
 
+llvm::Error DependencyScanningWorker::computeDependenciesByNameWithContext(
+    StringRef ModuleName, DependencyConsumer &Consumer,
+    DependencyActionController &Controller) {
+  assert(CIWithContext && "CompilerInstance with context required!");
+  return CIWithContext->computeDependencies(ModuleName, Consumer, Controller);
+}
+
 DependencyActionController::~DependencyActionController() {}
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 0e2758d123edc..3ef971dec66dd 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -1075,12 +1075,26 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
             HadErrors = true;
         }
       } else if (ModuleName) {
-        auto MaybeModuleDepsGraph = WorkerTool.getModuleDependencies(
-            *ModuleName, Input->CommandLine, CWD, AlreadySeenModules,
-            LookupOutput);
+        if (llvm::Error Err = WorkerTool.initializeCompilerInstacneWithContext(
+                CWD, Input->CommandLine)) {
+          llvm::errs() << "ERROR: compiler instance with context setup error "
+                       << Err << "\n";
+          HadErrors = true;
+          continue;
+        }
+        auto MaybeModuleDepsGraph =
+            WorkerTool.computeDependenciesByNameWithContext(
+                *ModuleName, AlreadySeenModules, LookupOutput);
         if (handleModuleResult(*ModuleName, MaybeModuleDepsGraph, *FD,
                                LocalIndex, DependencyOS, Errs))
           HadErrors = true;
+        if (llvm::Error Err =
+                WorkerTool.finalizeCompilerInstanceWithContext()) {
+          llvm::errs()
+              << "ERROR: compiler instance with context finialization error "
+              << Err << "\n";
+          HadErrors = true;
+        }
       } else {
         std::unique_ptr<llvm::MemoryBuffer> TU;
         std::optional<llvm::MemoryBufferRef> TUBuffer;

>From ee619a82200ebe8ffeb32c1d8154616d2f0329e9 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Mon, 22 Sep 2025 13:54:59 -0700
Subject: [PATCH 3/5] Reorganize some code and adding some documentation.

---
 .../DependencyScanningTool.h                  | 29 ++++++++++++++++++-
 .../DependencyScanningWorker.h                | 28 ++++++++++++++----
 .../DependencyScanningWorker.cpp              | 26 ++++++++---------
 3 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
index 2410c4aa99bcf..109330aa8f20c 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
@@ -161,14 +161,41 @@ class DependencyScanningTool {
 
   llvm::vfs::FileSystem &getWorkerVFS() const { return Worker.getVFS(); }
 
-  /// TODO: add documentation.
+  /// The following three methods provides a new interface to perform
+  /// by name dependency scan. The new interface's intention is to improve
+  /// dependency scanning performance when a sequence of name is looked up
+  /// with the same current working directory and the command line.
+
+  /// @brief Initializing the context and the compiler instance to perform.
+  ///        This method must be called before performing scanning.
+  /// @param CWD The current working directory used during the scan.
+  /// @param CommandLine The commandline used for the scan.
+  /// @return Error if the initializaiton fails.
   llvm::Error initializeCompilerInstacneWithContext(
       StringRef CWD, const std::vector<std::string> &CommandLine);
 
+  /// @brief Computes the dependeny for the module named ModuleName.
+  /// @param ModuleName The name of the module for which this method computes
+  ///.                  dependencies.
+  /// @param AlreadySeen This stores modules which have previously been
+  ///                    reported. Use the same instance for all calls to this
+  ///                    function for a single \c DependencyScanningTool in a
+  ///                    single build. Note that this parameter is not part of
+  ///                    the context because it can be shared across different
+  ///                    worker threads and each worker thread may update it.
+  /// @param LookupModuleOutput This function is called to fill in
+  ///                           "-fmodule-file=", "-o" and other output
+  ///                           arguments for dependencies.
+  /// @return An instance of \c TranslationUnitDeps if the scan is successful.
+  ///         Otherwise it returns an error.
   llvm::Expected<TranslationUnitDeps> computeDependenciesByNameWithContext(
       StringRef ModuleName, const llvm::DenseSet<ModuleID> &AlreadySeen,
       LookupModuleOutputCallback LookupModuleOutput);
 
+  /// @brief This method finializes the compiler instance. It finalizes the
+  ///        diagnostics and deletes the compiler instance. Call this method
+  ///        once all names for a same commandline are scanned.
+  /// @return Error if an error occured during finalization.
   llvm::Error finalizeCompilerInstanceWithContext();
 
 private:
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
index d61b2a6d8024a..d34489c568393 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -106,10 +106,6 @@ class DependencyScanningWorker {
   DependencyScanningWorker(DependencyScanningService &Service,
                            llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
 
-  llvm::Error initializeCompierInstanceWithContext(
-      StringRef CWD, const std::vector<std::string> &CommandLine);
-  llvm::Error finalizeCompilerInstanceWithContext();
-
   /// Run the dependency scanning tool for a given clang driver command-line,
   /// and report the discovered dependencies to the provided consumer. If
   /// TUBuffer is not nullopt, it is used as TU input for the dependency
@@ -157,12 +153,34 @@ class DependencyScanningWorker {
                                   DependencyActionController &Controller,
                                   StringRef ModuleName);
 
-  /// TODO: add documentation
+  /// The three method below implements a new interface for by name
+  /// dependency scanning. They together enable the dependency scanning worker
+  /// to more effectively perform scanning for a sequence of modules
+  /// by name when the CWD and CommandLine are holding constant.
+
+  /// @brief Initializing the context and the compiler instance to perform.
+  /// @param CWD The current working directory used during the scan.
+  /// @param CommandLine The commandline used for the scan.
+  /// @return Error if the initializaiton fails.
+  llvm::Error initializeCompierInstanceWithContext(
+      StringRef CWD, const std::vector<std::string> &CommandLine);
+
+  /// @brief Performaces dependency scanning for the module whose name is
+  ///        specified.
+  /// @param ModuleName  The name of the module whose dependency will be
+  ///                    scanned.
+  /// @param Consumer The dependency consumer that stores the results.
+  /// @param Controller The controller for the dependency scanning action.
+  /// @return Error of the scanner incurs errors.
   llvm::Error
   computeDependenciesByNameWithContext(StringRef ModuleName,
                                        DependencyConsumer &Consumer,
                                        DependencyActionController &Controller);
 
+  /// @brief Finalizes the diagnostics engine and deletes the compiler instance.
+  /// @return Error if errors occur during finalization.
+  llvm::Error finalizeCompilerInstanceWithContext();
+
   llvm::vfs::FileSystem &getVFS() const { return *BaseFS; }
 
 private:
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 4344ecc320fc2..a2b2da62e1864 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -603,19 +603,6 @@ DependencyScanningWorker::DependencyScanningWorker(
   }
 }
 
-llvm::Error DependencyScanningWorker::initializeCompierInstanceWithContext(
-    StringRef CWD, const std::vector<std::string> &CommandLine) {
-  CIWithContext =
-      std::make_unique<CompilerInstanceWithContext>(*this, CWD, CommandLine);
-  return CIWithContext->initialize();
-}
-
-llvm::Error DependencyScanningWorker::finalizeCompilerInstanceWithContext() {
-  llvm::Error E = CIWithContext->finalize();
-  CIWithContext.reset();
-  return E;
-}
-
 llvm::Error DependencyScanningWorker::computeDependencies(
     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
     DependencyConsumer &Consumer, DependencyActionController &Controller,
@@ -846,6 +833,13 @@ bool DependencyScanningWorker::computeDependencies(
                           Controller, DC, OverlayFS, ModuleName);
 }
 
+llvm::Error DependencyScanningWorker::initializeCompierInstanceWithContext(
+    StringRef CWD, const std::vector<std::string> &CommandLine) {
+  CIWithContext =
+      std::make_unique<CompilerInstanceWithContext>(*this, CWD, CommandLine);
+  return CIWithContext->initialize();
+}
+
 llvm::Error DependencyScanningWorker::computeDependenciesByNameWithContext(
     StringRef ModuleName, DependencyConsumer &Consumer,
     DependencyActionController &Controller) {
@@ -853,4 +847,10 @@ llvm::Error DependencyScanningWorker::computeDependenciesByNameWithContext(
   return CIWithContext->computeDependencies(ModuleName, Consumer, Controller);
 }
 
+llvm::Error DependencyScanningWorker::finalizeCompilerInstanceWithContext() {
+  llvm::Error E = CIWithContext->finalize();
+  CIWithContext.reset();
+  return E;
+}
+
 DependencyActionController::~DependencyActionController() {}

>From 12ce2e636c9196c012ac5496f3e7cd2d883863e8 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Mon, 22 Sep 2025 15:03:21 -0700
Subject: [PATCH 4/5] Clean up error handling in clang-scan-deps.

---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 3ef971dec66dd..5e23dce68a8d3 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -661,6 +661,18 @@ static bool handleModuleResult(StringRef ModuleName,
   return false;
 }
 
+static void handleCompilerInstanceWithContextError(StringRef Info,
+                                                   llvm::Error E,
+                                                   SharedStream &OS,
+                                                   SharedStream &Errs) {
+  llvm::handleAllErrors(std::move(E), [&Info, &Errs](llvm::StringError &Err) {
+    Errs.applyLocked([&](raw_ostream &OS) {
+      OS << "Error: " << Info << ":\n";
+      OS << Err.getMessage();
+    });
+  });
+}
+
 class P1689Deps {
 public:
   void printDependencies(raw_ostream &OS) {
@@ -1077,8 +1089,9 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
       } else if (ModuleName) {
         if (llvm::Error Err = WorkerTool.initializeCompilerInstacneWithContext(
                 CWD, Input->CommandLine)) {
-          llvm::errs() << "ERROR: compiler instance with context setup error "
-                       << Err << "\n";
+          handleCompilerInstanceWithContextError(
+              "Compiler instance with context setup error", std::move(Err),
+              DependencyOS, Errs);
           HadErrors = true;
           continue;
         }
@@ -1090,9 +1103,9 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
           HadErrors = true;
         if (llvm::Error Err =
                 WorkerTool.finalizeCompilerInstanceWithContext()) {
-          llvm::errs()
-              << "ERROR: compiler instance with context finialization error "
-              << Err << "\n";
+          handleCompilerInstanceWithContextError(
+              "Compiler instance with context finialization error",
+              std::move(Err), DependencyOS, Errs);
           HadErrors = true;
         }
       } else {

>From 63ce4f58953e15b06057f2189a29451c3208dd59 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Tue, 23 Sep 2025 11:03:47 -0700
Subject: [PATCH 5/5] Address code review.

---
 .../CompilerInstanceWithContext.h             |  1 -
 .../CompilerInstanceWithContext.cpp           | 19 ++++++++++++++-----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/CompilerInstanceWithContext.h b/clang/include/clang/Tooling/DependencyScanning/CompilerInstanceWithContext.h
index 5a2cb25d9d972..c52807c3531b0 100644
--- a/clang/include/clang/Tooling/DependencyScanning/CompilerInstanceWithContext.h
+++ b/clang/include/clang/Tooling/DependencyScanning/CompilerInstanceWithContext.h
@@ -65,7 +65,6 @@ class CompilerInstanceWithContext {
   PrebuiltModulesAttrsMap PrebuiltModuleVFSMap;
 
   // Compiler Instance
-  IntrusiveRefCntPtr<ModuleCache> ModCache;
   std::unique_ptr<CompilerInstance> CIPtr;
 
   //   // Source location offset.
diff --git a/clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp b/clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp
index 172a81003f7ba..d3a7343ad63d6 100644
--- a/clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp
+++ b/clang/lib/Tooling/DependencyScanning/CompilerInstanceWithContext.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Tooling/DependencyScanning/CompilerInstanceWithContext.h"
+#include "clang/Basic/DiagnosticFrontend.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
@@ -85,8 +86,16 @@ llvm::Error CompilerInstanceWithContext::initialize() {
         "Incorrect compilation command, missing cc1",
         llvm::inconvertibleErrorCode());
   Invocation = std::make_unique<CompilerInvocation>();
-  CompilerInvocation::CreateFromArgs(*Invocation, Command.getArguments(),
-                                     *Diags, Command.getExecutable());
+
+  if (!CompilerInvocation::CreateFromArgs(*Invocation, Command.getArguments(),
+                                          *Diags, Command.getExecutable())) {
+    Diags->Report(diag::err_fe_expected_compiler_job)
+        << llvm::join(CommandLine, " ");
+    return llvm::make_error<llvm::StringError>(
+        "Cannot create CompilerInvocation from Args",
+        llvm::inconvertibleErrorCode());
+  }
+
   Invocation->getFrontendOpts().DisableFree = false;
   Invocation->getCodeGenOpts().DisableFree = false;
 
@@ -94,7 +103,8 @@ llvm::Error CompilerInstanceWithContext::initialize() {
     canonicalizeDefines(Invocation->getPreprocessorOpts());
 
   // Create the CompilerInstance.
-  ModCache = makeInProcessModuleCache(Worker.Service.getModuleCacheEntries());
+  IntrusiveRefCntPtr<ModuleCache> ModCache =
+      makeInProcessModuleCache(Worker.Service.getModuleCacheEntries());
   CIPtr = std::make_unique<CompilerInstance>(
       std::make_shared<CompilerInvocation>(*Invocation), Worker.PCHContainerOps,
       ModCache.get());
@@ -126,8 +136,6 @@ llvm::Error CompilerInstanceWithContext::initialize() {
     CI.getHeaderSearchOpts().BuildSessionTimestamp =
         Worker.Service.getBuildSessionTimestamp();
 
-  CI.setDiagnostics(Diags.get());
-
   auto *FileMgr = CI.createFileManager();
 
   if (Worker.DepFS) {
@@ -232,6 +240,7 @@ llvm::Error CompilerInstanceWithContext::computeDependencies(
   }
 
   MDC->applyDiscoveredDependencies(Inv);
+  Consumer.handleBuildCommand({CommandLine[0], Inv.getCC1CommandLine()});
 
   // TODO: enable CAS
   //   std::string ID = Inv.getFileSystemOpts().CASFileSystemRootID;



More information about the cfe-commits mailing list