[clang] 8681470 - [clang][Dependency Scanning] Fix the In-Memory Buffer Used for By-Name Scanning (#183396)

via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 6 15:15:32 PST 2026


Author: Qiongsi Wu
Date: 2026-03-06T15:15:27-08:00
New Revision: 868147026258b982a8089701dfae531f9ebbd464

URL: https://github.com/llvm/llvm-project/commit/868147026258b982a8089701dfae531f9ebbd464
DIFF: https://github.com/llvm/llvm-project/commit/868147026258b982a8089701dfae531f9ebbd464.diff

LOG: [clang][Dependency Scanning] Fix the In-Memory Buffer Used for By-Name Scanning (#183396)

This PR fixes two issues of the in-memory buffer we use for the input
file when a dependency scanner performs by-name queries.

First, it renames the buffer. The temporary file was named
`ScanningByName-%%%%%%%%.input`, which leads to weird diagnostics such
as
```
ScanningByName-2d42a1e9.input:1:1: fatal error: could not build module 'X'
```

This PR changes the name of the file buffer, so we get diagnostics such
as
```
module-include.input:1:1: fatal error: could not build module 'X'
```
which is more indicative. 

Additionally, this PR fixes a bug where the source location may overflow
the temporary buffer by creating a 64k empty string which the temporary
buffer occupies. When the source location overflows, the diagnostics
could point to some random file that comes after the fake file and is
incorrect.

Currently, the maximum number of unique names from Apple's SDKs is
around 3000. A 64k buffer per dependency scanning worker gives us around
20x capacity per worker (which scans fewer names than 3000 when the
scanning is done in parallel). A fatal error is added to catch
overflows.

Added: 
    

Modified: 
    clang/include/clang/DependencyScanning/DependencyScannerImpl.h
    clang/include/clang/DependencyScanning/DependencyScanningWorker.h
    clang/include/clang/Tooling/DependencyScanningTool.h
    clang/lib/DependencyScanning/DependencyScanningWorker.cpp
    clang/lib/Tooling/DependencyScanningTool.cpp
    clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/DependencyScanning/DependencyScannerImpl.h b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h
index 24cf1afaaa450..a54a6269dbdc4 100644
--- a/clang/include/clang/DependencyScanning/DependencyScannerImpl.h
+++ b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h
@@ -16,7 +16,6 @@
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
-#include "clang/Serialization/ObjectFilePCHContainerReader.h"
 #include "llvm/Support/VirtualFileSystem.h"
 
 namespace clang {

diff  --git a/clang/include/clang/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/DependencyScanning/DependencyScanningWorker.h
index 0a5c4936f7fe7..92da219d85d56 100644
--- a/clang/include/clang/DependencyScanning/DependencyScanningWorker.h
+++ b/clang/include/clang/DependencyScanning/DependencyScanningWorker.h
@@ -140,20 +140,6 @@ class DependencyScanningWorker {
 
   friend tooling::CompilerInstanceWithContext;
 };
-
-std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>,
-          std::vector<std::string>>
-initVFSForTUBufferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
-                           ArrayRef<std::string> CommandLine,
-                           StringRef WorkingDirectory,
-                           llvm::MemoryBufferRef TUBuffer);
-
-std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>,
-          std::vector<std::string>>
-initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
-                         ArrayRef<std::string> CommandLine,
-                         StringRef WorkingDirectory, StringRef ModuleName);
-
 } // end namespace dependencies
 } // end namespace clang
 

diff  --git a/clang/include/clang/Tooling/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanningTool.h
index 40e791fd5bff5..30846ae0ebf3e 100644
--- a/clang/include/clang/Tooling/DependencyScanningTool.h
+++ b/clang/include/clang/Tooling/DependencyScanningTool.h
@@ -226,6 +226,16 @@ class CompilerInstanceWithContext {
       StringRef ModuleName,
       const llvm::DenseSet<dependencies::ModuleID> &AlreadySeen,
       dependencies::LookupModuleOutputCallback LookupModuleOutput);
+
+  // MaxNumOfQueries is the upper limit of the number of names the by-name
+  // scanning API (computeDependencies) can support after a
+  // CompilerInstanceWithContext is initialized. At the time of this commit, the
+  // estimated number of total unique importable names is around 3000 from
+  // Apple's SDKs. We usually import them in parallel, so it is unlikely that
+  // all names are all scanned by the same dependency scanning worker. Therefore
+  // the 64k (20x bigger than our estimate) size is sufficient to hold the
+  // unique source locations to report diagnostics per worker.
+  static const int32_t MaxNumOfQueries = 1 << 16;
 };
 
 } // end namespace tooling

diff  --git a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
index 7a7f34a43b338..75eb821ac651a 100644
--- a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
@@ -108,58 +108,3 @@ bool DependencyScanningWorker::computeDependencies(
 
   return Success && Action.hasScanned();
 }
-
-std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>,
-          std::vector<std::string>>
-dependencies::initVFSForTUBufferScanning(
-    IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
-    ArrayRef<std::string> CommandLine, StringRef WorkingDirectory,
-    llvm::MemoryBufferRef TUBuffer) {
-  // Reset what might have been modified in the previous worker invocation.
-  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
-
-  auto OverlayFS =
-      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
-  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
-  InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
-  auto InputPath = TUBuffer.getBufferIdentifier();
-  InMemoryFS->addFile(
-      InputPath, 0, llvm::MemoryBuffer::getMemBufferCopy(TUBuffer.getBuffer()));
-  IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
-
-  OverlayFS->pushOverlay(InMemoryOverlay);
-  std::vector<std::string> ModifiedCommandLine(CommandLine);
-  ModifiedCommandLine.emplace_back(InputPath);
-
-  return std::make_pair(OverlayFS, ModifiedCommandLine);
-}
-
-std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>,
-          std::vector<std::string>>
-dependencies::initVFSForByNameScanning(
-    IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
-    ArrayRef<std::string> CommandLine, StringRef WorkingDirectory,
-    StringRef ModuleName) {
-  // Reset what might have been modified in the previous worker invocation.
-  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
-
-  // If we're scanning based on a module name alone, we don't expect the client
-  // to provide us with an input file. However, the driver really wants to have
-  // one. Let's just make it up to make the driver happy.
-  auto OverlayFS =
-      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
-  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
-  InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
-  SmallString<128> FakeInputPath;
-  // TODO: We should retry the creation if the path already exists.
-  llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath,
-                                  /*MakeAbsolute=*/false);
-  InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
-  IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
-  OverlayFS->pushOverlay(InMemoryOverlay);
-
-  std::vector<std::string> ModifiedCommandLine(CommandLine);
-  ModifiedCommandLine.emplace_back(FakeInputPath);
-
-  return std::make_pair(OverlayFS, ModifiedCommandLine);
-}

diff  --git a/clang/lib/Tooling/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanningTool.cpp
index b570bc8e5ae30..147588eff59c7 100644
--- a/clang/lib/Tooling/DependencyScanningTool.cpp
+++ b/clang/lib/Tooling/DependencyScanningTool.cpp
@@ -253,6 +253,65 @@ std::optional<P1689Rule> DependencyScanningTool::getP1689ModuleDependencyFile(
   return Rule;
 }
 
+static std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>,
+                 std::vector<std::string>>
+initVFSForTUBufferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
+                           ArrayRef<std::string> CommandLine,
+                           StringRef WorkingDirectory,
+                           llvm::MemoryBufferRef TUBuffer) {
+  // Reset what might have been modified in the previous worker invocation.
+  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+
+  auto OverlayFS =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
+  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
+  auto InputPath = TUBuffer.getBufferIdentifier();
+  InMemoryFS->addFile(
+      InputPath, 0, llvm::MemoryBuffer::getMemBufferCopy(TUBuffer.getBuffer()));
+  IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
+
+  OverlayFS->pushOverlay(InMemoryOverlay);
+  std::vector<std::string> ModifiedCommandLine(CommandLine);
+  ModifiedCommandLine.emplace_back(InputPath);
+
+  return std::make_pair(OverlayFS, ModifiedCommandLine);
+}
+
+// The fake input buffer is read-only, and it is used to produce
+// unique source locations for the diagnostics. Therefore sharing
+// this global buffer across threads is ok.
+static const std::string
+    FakeInput(" ",
+              clang::tooling::CompilerInstanceWithContext::MaxNumOfQueries);
+
+static std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>,
+                 std::vector<std::string>>
+initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
+                         ArrayRef<std::string> CommandLine,
+                         StringRef WorkingDirectory) {
+  // Reset what might have been modified in the previous worker invocation.
+  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+
+  // If we're scanning based on a module name alone, we don't expect the client
+  // to provide us with an input file. However, the driver really wants to have
+  // one. Let's just make it up to make the driver happy.
+  auto OverlayFS =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
+  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
+  StringRef FakeInputPath("module-include.input");
+  InMemoryFS->addFile(FakeInputPath, 0,
+                      llvm::MemoryBuffer::getMemBuffer(FakeInput));
+  IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
+  OverlayFS->pushOverlay(InMemoryOverlay);
+
+  std::vector<std::string> ModifiedCommandLine(CommandLine);
+  ModifiedCommandLine.emplace_back(FakeInputPath);
+
+  return std::make_pair(OverlayFS, ModifiedCommandLine);
+}
+
 std::optional<TranslationUnitDeps>
 DependencyScanningTool::getTranslationUnitDependencies(
     ArrayRef<std::string> CommandLine, StringRef CWD,
@@ -320,8 +379,8 @@ std::optional<CompilerInstanceWithContext>
 CompilerInstanceWithContext::initializeFromCommandline(
     DependencyScanningTool &Tool, StringRef CWD,
     ArrayRef<std::string> CommandLine, DiagnosticConsumer &DC) {
-  auto [OverlayFS, ModifiedCommandLine] = initVFSForByNameScanning(
-      &Tool.Worker.getVFS(), CommandLine, CWD, "ScanningByName");
+  auto [OverlayFS, ModifiedCommandLine] =
+      initVFSForByNameScanning(&Tool.Worker.getVFS(), CommandLine, CWD);
   auto DiagEngineWithCmdAndOpts =
       std::make_unique<DiagnosticsEngineWithDiagOpts>(ModifiedCommandLine,
                                                       OverlayFS, DC);
@@ -446,6 +505,9 @@ bool CompilerInstanceWithContext::initialize(
 bool CompilerInstanceWithContext::computeDependencies(
     StringRef ModuleName, DependencyConsumer &Consumer,
     DependencyActionController &Controller) {
+  if (SrcLocOffset >= MaxNumOfQueries)
+    llvm::report_fatal_error("exceeded maximum by-name scans for worker");
+
   assert(CIPtr && "CIPtr must be initialized before calling this method");
   auto &CI = *CIPtr;
 

diff  --git a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c
index 9889982354e90..f99690c3a7715 100644
--- a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c
+++ b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c
@@ -27,11 +27,11 @@ module root2 { header "root2.h" }
 // RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
 
 // ERROR: Error while scanning dependencies for modA:
-// ERROR-NEXT: {{.*}}: fatal error: module 'modA' not found
+// ERROR-NEXT: module-include.input:1:1: fatal error: module 'modA' not found
 // ERROR-NEXT: Error while scanning dependencies for modB:
-// ERROR-NEXT: {{.*}}: fatal error: module 'modB' not found
+// ERROR-NEXT: module-include.input:1:3: fatal error: module 'modB' not found
 // ERROR-NEXT: Error while scanning dependencies for modC:
-// ERROR-NEXT: {{.*}}: fatal error: module 'modC' not found
+// ERROR-NEXT: module-include.input:1:4: fatal error: module 'modC' not found
 // CHECK:      {
 // CHECK-NEXT:   "modules": [
 // CHECK-NEXT:     {


        


More information about the cfe-commits mailing list