[clang] [clang][deps] Avoid `CompilerInvocation` copies (PR #205632)
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 26 09:00:24 PDT 2026
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/205632
>From bc6447f27fdf093191b8fa1f9db57d62abbb01b7 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 24 Jun 2026 11:03:05 -0700
Subject: [PATCH 1/2] [clang][deps] Avoid `CompilerInvocation` copies
When constructing the dependency graph for compilation caching, the dependency scanner needs to do some extra operations on the compiler invocations. Historically, these have not utilized the copy-on-write variant well. This patch takes care to minimize `CompilerInvocation` copies, which improves incremental scans with populated up-to-date scanning module cache by 16-18%. Together with https://github.com/llvm/llvm-project/pull/203350 which operates in the same space, wall-times are improved by 1.54x and instruction counts by 1.66x.
---
.../DependencyActionController.h | 2 +-
.../clang/Frontend/CompilerInvocation.h | 80 ++++++++++++++++++-
.../DependencyScannerImpl.cpp | 12 ++-
clang/lib/Frontend/CompilerInvocation.cpp | 23 ++++++
clang/lib/Tooling/DependencyScanningTool.cpp | 6 +-
5 files changed, 118 insertions(+), 5 deletions(-)
diff --git a/clang/include/clang/DependencyScanning/DependencyActionController.h b/clang/include/clang/DependencyScanning/DependencyActionController.h
index 024b0de9048ec..023f080b767cc 100644
--- a/clang/include/clang/DependencyScanning/DependencyActionController.h
+++ b/clang/include/clang/DependencyScanning/DependencyActionController.h
@@ -61,7 +61,7 @@ class DependencyActionController {
/// Finalizes the scan instance and modifies the resulting TU invocation.
/// Returns true on success, false on failure.
virtual bool finalize(CompilerInstance &ScanInstance,
- CompilerInvocation &NewInvocation) {
+ CowCompilerInvocation &NewInvocation) {
return true;
}
diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h
index 03097aefacf50..a3bd41a70a4ec 100644
--- a/clang/include/clang/Frontend/CompilerInvocation.h
+++ b/clang/include/clang/Frontend/CompilerInvocation.h
@@ -21,8 +21,10 @@
#include "clang/Frontend/MigratorOptions.h"
#include "clang/Frontend/PreprocessorOutputOptions.h"
#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/ScopeExit.h"
+
#include <memory>
#include <string>
@@ -127,6 +129,9 @@ class CompilerInvocationBase {
/// prevent creation of the reference-counted option objects.
struct EmptyConstructor {};
+ /// Tag for the shallow-copy constructor below.
+ struct ShallowConstructor {};
+
CompilerInvocationBase();
CompilerInvocationBase(EmptyConstructor) {}
CompilerInvocationBase(const CompilerInvocationBase &X) = delete;
@@ -251,6 +256,15 @@ class CompilerInvocation : public CompilerInvocationBase {
explicit CompilerInvocation(const CowCompilerInvocation &X);
CompilerInvocation &operator=(const CowCompilerInvocation &X);
+ /// Move-construct/move-assign from a \c CowCompilerInvocation. Steals the
+ /// (potentially copy-on-written) option group pointers without deep-copying;
+ /// \p X is left empty. Useful to receive results of mutating a temporary
+ /// Cow alias back into a \c CompilerInvocation.
+ /// @{
+ explicit CompilerInvocation(CowCompilerInvocation &&X);
+ CompilerInvocation &operator=(CowCompilerInvocation &&X);
+ /// @}
+
/// Const getters.
/// @{
// Note: These need to be pulled in manually. Otherwise, they get hidden by
@@ -293,6 +307,22 @@ class CompilerInvocation : public CompilerInvocationBase {
ssaf::SSAFOptions &getSSAFOpts() { return *SSAFOpts; }
/// @}
+ /// Invokes the \a Fn with CowCompilerInvocation representing \c this.
+ /// The \a Fn must not directly modify \c this.
+ /// The provided \c CowCompilerInvocation must not escape \a Fn.
+ template <class R>
+ R withCowRef(llvm::function_ref<R(CowCompilerInvocation &)> Fn);
+ template <class R>
+ R withCowRef(llvm::function_ref<R(const CowCompilerInvocation &)> Fn) const;
+
+ /// Visitation.
+ /// @{
+ /// Visits paths stored in the invocation. The callback may return true to
+ /// short-circuit the visitation, or return false to continue visiting. This
+ /// is allowed to mutate the visited paths.
+ void visitPaths(llvm::function_ref<bool(std::string &)> Callback);
+ /// @}
+
/// Create a compiler invocation from a list of input options.
/// \returns true on success.
///
@@ -385,6 +415,16 @@ class CowCompilerInvocation : public CompilerInvocationBase {
CowCompilerInvocation(CompilerInvocation &&X)
: CompilerInvocationBase(std::move(X)) {}
+ /// Construct a CowCompilerInvocation that aliases the option storage of \p
+ /// X without deep-copying. Subsequent mutations through getMut*Opts() will
+ /// copy-on-write per group as usual, leaving \p X unaffected. The caller
+ /// must guarantee that \p X is not mutated for the lifetime of the
+ /// constructed invocation.
+ CowCompilerInvocation(ShallowConstructor, const CompilerInvocation &X)
+ : CompilerInvocationBase(EmptyConstructor{}) {
+ shallow_copy_assign(X);
+ }
+
// Const getters are inherited from the base class.
/// Mutable getters.
@@ -404,8 +444,46 @@ class CowCompilerInvocation : public CompilerInvocationBase {
PreprocessorOutputOptions &getMutPreprocessorOutputOpts();
ssaf::SSAFOptions &getMutSSAFOpts();
/// @}
+
+ /// Visits paths stored in the invocation, allowing the callback to mutate
+ /// them. To preserve the copy-on-write invariant for groups whose paths the
+ /// caller might modify, this ensures unique ownership of every option group
+ /// up front; if the callback only inspects (and does not mutate) the paths,
+ /// the const \c visitPaths overload should be used instead to avoid those
+ /// per-group copies.
+ void visitMutPaths(llvm::function_ref<bool(std::string &)> Callback);
};
+template <class R>
+R CompilerInvocation::withCowRef(
+ llvm::function_ref<R(CowCompilerInvocation &)> Fn) {
+ // We use moves to avoid bumping the ref-count of the shared_ptr that holds
+ // individual options. Since we expect \a Fn to actually modify \c CowRef,
+ // this prevents temporary copies.
+ CowCompilerInvocation CowRef = std::move(*this);
+ llvm::scope_exit Mutate([&]() { *this = std::move(CowRef); });
+ return Fn(CowRef);
+}
+
+template <class R>
+R CompilerInvocation::withCowRef(
+ llvm::function_ref<R(const CowCompilerInvocation &)> Fn) const {
+ // We use the shallow constructor. Since \a Fn cannot modify \c CowRef, no
+ // copies will be created, despite the bump to the ref-count of the shared_ptr
+ // that holds individual options.
+ CowCompilerInvocation CowRef(ShallowConstructor{}, *this);
+ return Fn(CowRef);
+}
+
+inline CompilerInvocation::CompilerInvocation(CowCompilerInvocation &&X)
+ : CompilerInvocationBase(std::move(X)) {}
+
+inline CompilerInvocation &
+CompilerInvocation::operator=(CowCompilerInvocation &&X) {
+ CompilerInvocationBase::operator=(std::move(X));
+ return *this;
+}
+
IntrusiveRefCntPtr<llvm::vfs::FileSystem>
createVFSFromCompilerInvocation(const CompilerInvocation &CI,
DiagnosticsEngine &Diags);
diff --git a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp
index dc3dbe3603c01..68fda9227dfcb 100644
--- a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp
+++ b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp
@@ -713,7 +713,11 @@ bool DependencyScanningAction::runInvocation(
if (MDC)
MDC->applyDiscoveredDependencies(*OriginalInvocation);
- if (!Controller.finalize(ScanInstance, *OriginalInvocation))
+ bool Success = OriginalInvocation->withCowRef<bool>(
+ [&](CowCompilerInvocation &CowOriginalInvocation) {
+ return Controller.finalize(ScanInstance, CowOriginalInvocation);
+ });
+ if (!Success)
return false;
Consumer.handleBuildCommand(
@@ -791,7 +795,11 @@ bool DependencyScanningAction::runInvocation(
MDC->applyDiscoveredDependencies(*OriginalInvocation);
}
- if (!Controller.finalize(ScanInstance, *OriginalInvocation))
+ bool Success = OriginalInvocation->withCowRef<bool>(
+ [&](CowCompilerInvocation &CowOriginalInvocation) {
+ return Controller.finalize(ScanInstance, CowOriginalInvocation);
+ });
+ if (!Success)
return false;
Consumer.handleBuildCommand(
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index dfde7b756dbff..e2260eb0d078a 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -5435,6 +5435,29 @@ void CompilerInvocationBase::visitPaths(
[&Callback](std::string &Path) { return Callback(StringRef(Path)); });
}
+void CowCompilerInvocation::visitMutPaths(
+ llvm::function_ref<bool(std::string &)> Callback) {
+ // Ensure exclusive ownership of every option group, so that visitPathsImpl()
+ // doesn't affect any other invocations.
+ // FIXME: Do this only if \c Callback does decide to modify any strings in an
+ // option group.
+ (void)ensureOwned(LangOpts);
+ (void)ensureOwned(TargetOpts);
+ (void)ensureOwned(DiagnosticOpts);
+ (void)ensureOwned(HSOpts);
+ (void)ensureOwned(PPOpts);
+ (void)ensureOwned(AnalyzerOpts);
+ (void)ensureOwned(MigratorOpts);
+ (void)ensureOwned(APINotesOpts);
+ (void)ensureOwned(CodeGenOpts);
+ (void)ensureOwned(FSOpts);
+ (void)ensureOwned(FrontendOpts);
+ (void)ensureOwned(DependencyOutputOpts);
+ (void)ensureOwned(PreprocessorOutputOpts);
+ (void)ensureOwned(SSAFOpts);
+ visitPathsImpl(Callback);
+}
+
void CompilerInvocationBase::generateCC1CommandLine(
ArgumentConsumer Consumer) const {
llvm::Triple T(getTargetOpts().Triple);
diff --git a/clang/lib/Tooling/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanningTool.cpp
index d55367107862d..11b225830c2fc 100644
--- a/clang/lib/Tooling/DependencyScanningTool.cpp
+++ b/clang/lib/Tooling/DependencyScanningTool.cpp
@@ -587,7 +587,11 @@ bool CompilerInstanceWithContext::computeDependencies(
MDC->run(Consumer);
MDC->applyDiscoveredDependencies(ModuleInvocation);
- if (!Controller.finalize(CI, ModuleInvocation))
+ bool Success = ModuleInvocation.withCowRef<bool>(
+ [&](CowCompilerInvocation &CowModuleInvocation) {
+ return Controller.finalize(CI, CowModuleInvocation);
+ });
+ if (!Success)
return false;
Consumer.handleBuildCommand(
>From ce82c7d9780e917f7b2b30a56cca0921ffbab4e4 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Fri, 26 Jun 2026 09:00:09 -0700
Subject: [PATCH 2/2] Unit tests
---
.../Frontend/CompilerInvocationTest.cpp | 67 +++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/clang/unittests/Frontend/CompilerInvocationTest.cpp b/clang/unittests/Frontend/CompilerInvocationTest.cpp
index 887fbc5938333..2c0c6972cef3f 100644
--- a/clang/unittests/Frontend/CompilerInvocationTest.cpp
+++ b/clang/unittests/Frontend/CompilerInvocationTest.cpp
@@ -165,6 +165,73 @@ TEST(CompilerInvocationTest, CopyOnWriteAssignment) {
EXPECT_EQ(B.getFrontendOpts().OutputFile, "x.o");
}
+TEST(CompilerInvocationTest, WithConstCowRef) {
+ CompilerInvocation CI;
+ CI.getHeaderSearchOpts().ModuleCachePath = "mcp";
+
+ HeaderSearchOptions *HSOpts = &CI.getHeaderSearchOpts();
+
+ CI.withCowRef<void>([](const CowCompilerInvocation &CowCI) {
+ // Values stored in the original invocation are reflected in cow.
+ EXPECT_EQ(CowCI.getHeaderSearchOpts().ModuleCachePath, "mcp");
+ });
+
+ // Creating const cow reference does not make a copy.
+ EXPECT_EQ(HSOpts, &CI.getHeaderSearchOpts());
+}
+
+TEST(CompilerInvocationTest, WithMutCowRef) {
+ CompilerInvocation CI;
+ CI.getHeaderSearchOpts().ModuleCachePath = "mcp";
+ CI.getLangOpts().Modules = true;
+
+ HeaderSearchOptions *HSOpts = &CI.getHeaderSearchOpts();
+ LangOptions *LangOpts = &CI.getLangOpts();
+
+ CI.withCowRef<void>([](CowCompilerInvocation &CowCI) {
+ // Values stored in the original invocation are reflected in cow.
+ EXPECT_EQ(CowCI.getHeaderSearchOpts().ModuleCachePath, "mcp");
+ // Values can be mutated.
+ CowCI.getMutLangOpts().Modules = false;
+ });
+
+ // Reading options class on a non-const cow reference does not make a copy.
+ EXPECT_EQ(HSOpts, &CI.getHeaderSearchOpts());
+ // Writing options class on a non-const cow reference does not make a copy.
+ EXPECT_EQ(LangOpts, &CI.getLangOpts());
+ // Writing options class on a non-const cow reference modifies the original.
+ EXPECT_EQ(CI.getLangOpts().Modules, false);
+}
+
+TEST(CompilerInvocationTest, CopyOnWriteVisitPaths) {
+ CowCompilerInvocation A;
+ A.getMutHeaderSearchOpts().ModuleCachePath = "mcp";
+ A.getMutLangOpts().Modules = true;
+
+ CowCompilerInvocation B(A);
+
+ const HeaderSearchOptions *HSOpts = &B.getHeaderSearchOpts();
+ const LangOptions *LangOpts = &B.getLangOpts();
+ B.visitMutPaths([](std::string &Path) {
+ if (Path == "mcp") {
+ Path = "pcm";
+ return true;
+ }
+ return false;
+ });
+
+ // Modifying a path copies and modifies only one instance of the invocation.
+ EXPECT_NE(HSOpts, &B.getHeaderSearchOpts());
+ EXPECT_EQ(B.getHeaderSearchOpts().ModuleCachePath, "pcm");
+ // And the other instance remains unmodified.
+ EXPECT_EQ(HSOpts, &A.getHeaderSearchOpts());
+ EXPECT_EQ(A.getHeaderSearchOpts().ModuleCachePath, "mcp");
+
+ // FIXME: Make this work: Unmodified options are not copied.
+ // EXPECT_EQ(LangOpts, &B.getLangOpts());
+ (void)LangOpts;
+}
+
// Boolean option with a keypath that defaults to true.
// The only flag with a negative spelling can set the keypath to false.
More information about the cfe-commits
mailing list