[clang] [clang][Dependency Scanning] Fix the In-Memory Buffer Used for By-Name Scanning (PR #183396)
Qiongsi Wu via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 4 09:14:29 PST 2026
https://github.com/qiongsiwu updated https://github.com/llvm/llvm-project/pull/183396
>From f3d4a8afbb342afd2b2e24af2368a9a9c9d4fed4 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Wed, 25 Feb 2026 13:34:42 -0800
Subject: [PATCH 1/5] Rename the temporary file the compiler creates for
by-name dependency scanning.
---
.../clang/DependencyScanning/DependencyScanningWorker.h | 2 +-
clang/lib/DependencyScanning/DependencyScanningWorker.cpp | 8 ++++----
clang/lib/Tooling/DependencyScanningTool.cpp | 4 ++--
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/clang/include/clang/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/DependencyScanning/DependencyScanningWorker.h
index 9267c25c93ee9..ee67b2e16eb77 100644
--- a/clang/include/clang/DependencyScanning/DependencyScanningWorker.h
+++ b/clang/include/clang/DependencyScanning/DependencyScanningWorker.h
@@ -189,7 +189,7 @@ std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>,
std::vector<std::string>>
initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
ArrayRef<std::string> CommandLine,
- StringRef WorkingDirectory, StringRef ModuleName);
+ StringRef WorkingDirectory);
} // end namespace dependencies
} // end namespace clang
diff --git a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
index 4c24642c437c3..cb9455263d63d 100644
--- a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
@@ -112,7 +112,7 @@ bool DependencyScanningWorker::computeDependencies(
bool DependencyScanningWorker::initializeCompilerInstanceWithContext(
StringRef CWD, ArrayRef<std::string> CommandLine, DiagnosticConsumer &DC) {
auto [OverlayFS, ModifiedCommandLine] =
- initVFSForByNameScanning(DepFS, CommandLine, CWD, "ScanningByName");
+ initVFSForByNameScanning(DepFS, CommandLine, CWD);
auto DiagEngineWithCmdAndOpts =
std::make_unique<DiagnosticsEngineWithDiagOpts>(ModifiedCommandLine,
OverlayFS, DC);
@@ -166,8 +166,7 @@ std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>,
std::vector<std::string>>
dependencies::initVFSForByNameScanning(
IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
- ArrayRef<std::string> CommandLine, StringRef WorkingDirectory,
- StringRef ModuleName) {
+ ArrayRef<std::string> CommandLine, StringRef WorkingDirectory) {
// Reset what might have been modified in the previous worker invocation.
BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
@@ -180,7 +179,8 @@ dependencies::initVFSForByNameScanning(
InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
SmallString<128> FakeInputPath;
// TODO: We should retry the creation if the path already exists.
- llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath,
+ llvm::sys::fs::createUniquePath("module-includes-%%%%%%%%.input",
+ FakeInputPath,
/*MakeAbsolute=*/false);
InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
diff --git a/clang/lib/Tooling/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanningTool.cpp
index 26f07286f18c4..fadef8a27aa2b 100644
--- a/clang/lib/Tooling/DependencyScanningTool.cpp
+++ b/clang/lib/Tooling/DependencyScanningTool.cpp
@@ -324,8 +324,8 @@ bool DependencyScanningTool::initializeWorkerCIWithContextFromCommandline(
// The input command line is either a driver-style command line, or
// ill-formed. In this case, we will first call the Driver to build a -cc1
// command line for this compilation or diagnose any ill-formed input.
- auto [OverlayFS, ModifiedCommandLine] = initVFSForByNameScanning(
- &Worker.getVFS(), CommandLine, CWD, "ScanningByName");
+ auto [OverlayFS, ModifiedCommandLine] =
+ initVFSForByNameScanning(&Worker.getVFS(), CommandLine, CWD);
auto DiagEngineWithCmdAndOpts =
std::make_unique<DiagnosticsEngineWithDiagOpts>(ModifiedCommandLine,
OverlayFS, DC);
>From 7ce01879b63a363eef6d44c1db25809a5b5f2e1c Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Mon, 2 Mar 2026 16:37:30 -0800
Subject: [PATCH 2/5] Fixing potential source location overflow. And reduce the
length of the random string since we do not need it that wide.
---
.../clang/DependencyScanning/DependencyScannerImpl.h | 10 ++++++++++
.../lib/DependencyScanning/DependencyScannerImpl.cpp | 8 ++++++++
.../DependencyScanning/DependencyScanningWorker.cpp | 12 +++++++++---
.../modules-full-by-mult-mod-names-diagnostics.c | 6 +++---
4 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/clang/include/clang/DependencyScanning/DependencyScannerImpl.h b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h
index 4210c20942a32..27bf1ce088ac8 100644
--- a/clang/include/clang/DependencyScanning/DependencyScannerImpl.h
+++ b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h
@@ -154,6 +154,16 @@ class CompilerInstanceWithContext {
IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFS);
bool computeDependencies(StringRef ModuleName, DependencyConsumer &Consumer,
DependencyActionController &Controller);
+
+ // FIXME: see if we can use a smaller fake buffer, or set the buffer's size
+ // dynamically.
+ // At the time of this commit, the estimated number of total unique importable
+ // names is around 3000 from Apple's SDKs. We usually import them in parallel,
+ // so it is unlikely that all names are all scanned by the same dependency
+ // scanning worker. Therefore the 64k (20x bigger than our estimate) size is
+ // sufficient to hold the unique source locations to report diagnostics per
+ // worker.
+ static const int32_t FakeInputBufferSize = 1 << 16;
};
} // namespace dependencies
} // namespace clang
diff --git a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp
index 3e680f4b4b363..7708477d7abd2 100644
--- a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp
+++ b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp
@@ -936,6 +936,14 @@ bool CompilerInstanceWithContext::computeDependencies(
// FIXME: Scan modules asynchronously here as well.
SrcLocOffset++;
+ if (SrcLocOffset >= FakeInputBufferSize) {
+ // This should never happen. We could have replaced this by an assert,
+ // but reporting a fatal error is more explicit, and we know
+ // immediately we are overflowing.
+ llvm::report_fatal_error("exceeded maximum by-name scans per worker; "
+ "increase module-import-by-name.input's size");
+ }
+
SmallVector<IdentifierLoc, 2> Path;
IdentifierInfo *ModuleID = PP.getIdentifierInfo(ModuleName);
Path.emplace_back(IDLocation, ModuleID);
diff --git a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
index cb9455263d63d..3e89f543b75ce 100644
--- a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
@@ -162,6 +162,12 @@ dependencies::initVFSForTUBufferScanning(
return std::make_pair(OverlayFS, ModifiedCommandLine);
}
+// The fake input buffer is read-only, and it is used to produce
+// unique source locations for the diagnostics. Therefore sharing
+// this global buffer across threads is ok.
+static const std::string
+ FakeInput(" ", CompilerInstanceWithContext::FakeInputBufferSize);
+
std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>,
std::vector<std::string>>
dependencies::initVFSForByNameScanning(
@@ -179,10 +185,10 @@ dependencies::initVFSForByNameScanning(
InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
SmallString<128> FakeInputPath;
// TODO: We should retry the creation if the path already exists.
- llvm::sys::fs::createUniquePath("module-includes-%%%%%%%%.input",
- FakeInputPath,
+ llvm::sys::fs::createUniquePath("module-import-%%%%.input", FakeInputPath,
/*MakeAbsolute=*/false);
- InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
+ InMemoryFS->addFile(FakeInputPath, 0,
+ llvm::MemoryBuffer::getMemBuffer(FakeInput));
IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
OverlayFS->pushOverlay(InMemoryOverlay);
diff --git a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c
index 9889982354e90..fa93f5fc92fd7 100644
--- a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c
+++ b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c
@@ -27,11 +27,11 @@ module root2 { header "root2.h" }
// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
// ERROR: Error while scanning dependencies for modA:
-// ERROR-NEXT: {{.*}}: fatal error: module 'modA' not found
+// ERROR-NEXT: module-import-[[RAND:[a-zA-Z0-9]{4}]].input:1:1: fatal error: module 'modA' not found
// ERROR-NEXT: Error while scanning dependencies for modB:
-// ERROR-NEXT: {{.*}}: fatal error: module 'modB' not found
+// ERROR-NEXT: module-import-[[RAND]].input:1:3: fatal error: module 'modB' not found
// ERROR-NEXT: Error while scanning dependencies for modC:
-// ERROR-NEXT: {{.*}}: fatal error: module 'modC' not found
+// ERROR-NEXT: module-import-[[RAND]].input:1:4: fatal error: module 'modC' not found
// CHECK: {
// CHECK-NEXT: "modules": [
// CHECK-NEXT: {
>From cfc65cd22076bd92b2c7ee219e5cc0381c5a86c9 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Tue, 3 Mar 2026 14:33:12 -0800
Subject: [PATCH 3/5] Remove unnecessary unique identifier from the buffer's
file name.
---
clang/lib/DependencyScanning/DependencyScanningWorker.cpp | 4 +---
.../modules-full-by-mult-mod-names-diagnostics.c | 6 +++---
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
index 3e89f543b75ce..56348ac3a6332 100644
--- a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
@@ -183,10 +183,8 @@ dependencies::initVFSForByNameScanning(
llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
- SmallString<128> FakeInputPath;
+ std::string FakeInputPath("module-include.input");
// TODO: We should retry the creation if the path already exists.
- llvm::sys::fs::createUniquePath("module-import-%%%%.input", FakeInputPath,
- /*MakeAbsolute=*/false);
InMemoryFS->addFile(FakeInputPath, 0,
llvm::MemoryBuffer::getMemBuffer(FakeInput));
IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
diff --git a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c
index fa93f5fc92fd7..f99690c3a7715 100644
--- a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c
+++ b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c
@@ -27,11 +27,11 @@ module root2 { header "root2.h" }
// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
// ERROR: Error while scanning dependencies for modA:
-// ERROR-NEXT: module-import-[[RAND:[a-zA-Z0-9]{4}]].input:1:1: fatal error: module 'modA' not found
+// ERROR-NEXT: module-include.input:1:1: fatal error: module 'modA' not found
// ERROR-NEXT: Error while scanning dependencies for modB:
-// ERROR-NEXT: module-import-[[RAND]].input:1:3: fatal error: module 'modB' not found
+// ERROR-NEXT: module-include.input:1:3: fatal error: module 'modB' not found
// ERROR-NEXT: Error while scanning dependencies for modC:
-// ERROR-NEXT: module-import-[[RAND]].input:1:4: fatal error: module 'modC' not found
+// ERROR-NEXT: module-include.input:1:4: fatal error: module 'modC' not found
// CHECK: {
// CHECK-NEXT: "modules": [
// CHECK-NEXT: {
>From f1865aa35e69f170fa3292229be54185610dae4d Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Tue, 3 Mar 2026 16:18:15 -0800
Subject: [PATCH 4/5] Address code review.
---
clang/lib/DependencyScanning/DependencyScanningWorker.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
index 56348ac3a6332..59f4ca0048085 100644
--- a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
@@ -183,8 +183,7 @@ dependencies::initVFSForByNameScanning(
llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
- std::string FakeInputPath("module-include.input");
- // TODO: We should retry the creation if the path already exists.
+ StringRef FakeInputPath("module-include.input");
InMemoryFS->addFile(FakeInputPath, 0,
llvm::MemoryBuffer::getMemBuffer(FakeInput));
IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
>From 6c12f9a349abdf79cb5e29f502ad8ba95221c964 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Wed, 4 Mar 2026 09:14:08 -0800
Subject: [PATCH 5/5] Address review comments.
---
.../DependencyScanning/DependencyScannerImpl.h | 18 +++++++++---------
.../DependencyScannerImpl.cpp | 10 +++-------
.../DependencyScanningWorker.cpp | 2 +-
3 files changed, 13 insertions(+), 17 deletions(-)
diff --git a/clang/include/clang/DependencyScanning/DependencyScannerImpl.h b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h
index 27bf1ce088ac8..05bef147e6f86 100644
--- a/clang/include/clang/DependencyScanning/DependencyScannerImpl.h
+++ b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h
@@ -155,15 +155,15 @@ class CompilerInstanceWithContext {
bool computeDependencies(StringRef ModuleName, DependencyConsumer &Consumer,
DependencyActionController &Controller);
- // FIXME: see if we can use a smaller fake buffer, or set the buffer's size
- // dynamically.
- // At the time of this commit, the estimated number of total unique importable
- // names is around 3000 from Apple's SDKs. We usually import them in parallel,
- // so it is unlikely that all names are all scanned by the same dependency
- // scanning worker. Therefore the 64k (20x bigger than our estimate) size is
- // sufficient to hold the unique source locations to report diagnostics per
- // worker.
- static const int32_t FakeInputBufferSize = 1 << 16;
+ // MaxNumOfQueries is the upper limit of the number of names the by-name
+ // scanning API (computeDependencies) can support after a
+ // CompilerInstanceWithContext is initialized. At the time of this commit, the
+ // estimated number of total unique importable names is around 3000 from
+ // Apple's SDKs. We usually import them in parallel, so it is unlikely that
+ // all names are all scanned by the same dependency scanning worker. Therefore
+ // the 64k (20x bigger than our estimate) size is sufficient to hold the
+ // unique source locations to report diagnostics per worker.
+ static const int32_t MaxNumOfQueries = 1 << 16;
};
} // namespace dependencies
} // namespace clang
diff --git a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp
index 7708477d7abd2..2cd4ac6c564b4 100644
--- a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp
+++ b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp
@@ -869,6 +869,9 @@ bool CompilerInstanceWithContext::initialize(
bool CompilerInstanceWithContext::computeDependencies(
StringRef ModuleName, DependencyConsumer &Consumer,
DependencyActionController &Controller) {
+ if (SrcLocOffset >= MaxNumOfQueries)
+ llvm::report_fatal_error("exceeded maximum by-name scans per worker");
+
assert(CIPtr && "CIPtr must be initialized before calling this method");
auto &CI = *CIPtr;
@@ -936,13 +939,6 @@ bool CompilerInstanceWithContext::computeDependencies(
// FIXME: Scan modules asynchronously here as well.
SrcLocOffset++;
- if (SrcLocOffset >= FakeInputBufferSize) {
- // This should never happen. We could have replaced this by an assert,
- // but reporting a fatal error is more explicit, and we know
- // immediately we are overflowing.
- llvm::report_fatal_error("exceeded maximum by-name scans per worker; "
- "increase module-import-by-name.input's size");
- }
SmallVector<IdentifierLoc, 2> Path;
IdentifierInfo *ModuleID = PP.getIdentifierInfo(ModuleName);
diff --git a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
index 59f4ca0048085..32662fcf078a1 100644
--- a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp
@@ -166,7 +166,7 @@ dependencies::initVFSForTUBufferScanning(
// unique source locations for the diagnostics. Therefore sharing
// this global buffer across threads is ok.
static const std::string
- FakeInput(" ", CompilerInstanceWithContext::FakeInputBufferSize);
+ FakeInput(" ", CompilerInstanceWithContext::MaxNumOfQueries);
std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>,
std::vector<std::string>>
More information about the cfe-commits
mailing list