[clang] [clang] Introduce copy-on-write `CompilerInvocation` (PR #65412)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 7 13:12:28 PDT 2023


https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/65412:

>From 1cf820b6e89b8747ed77bf998e0c0784a23bf851 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 7 Sep 2023 12:54:51 -0700
Subject: [PATCH] [clang] Introduce copy-on-write `CompilerInvocation`

---
 clang/include/clang/Basic/CodeGenOptions.h    |   1 +
 clang/include/clang/Basic/DiagnosticOptions.h |   1 +
 clang/include/clang/Basic/LangOptions.h       |   1 +
 .../clang/Frontend/CompilerInvocation.h       | 204 +++++++++++++-----
 clang/lib/Frontend/CompilerInvocation.cpp     | 113 ++++++++--
 5 files changed, 247 insertions(+), 73 deletions(-)

diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index 14fc94fe27f9958..08d4c103ce2453b 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -32,6 +32,7 @@ namespace clang {
 /// that this large collection of bitfields is a trivial class type.
 class CodeGenOptionsBase {
   friend class CompilerInvocation;
+  friend class CompilerInvocationBase;
 
 public:
 #define CODEGENOPT(Name, Bits, Default) unsigned Name : Bits;
diff --git a/clang/include/clang/Basic/DiagnosticOptions.h b/clang/include/clang/Basic/DiagnosticOptions.h
index 0f3120859ecef6c..099982c3bdd5a00 100644
--- a/clang/include/clang/Basic/DiagnosticOptions.h
+++ b/clang/include/clang/Basic/DiagnosticOptions.h
@@ -72,6 +72,7 @@ class DiagnosticOptions : public RefCountedBase<DiagnosticOptions>{
                                   clang::DiagnosticsEngine *, bool);
 
   friend class CompilerInvocation;
+  friend class CompilerInvocationBase;
 
 public:
   enum TextDiagnosticFormat { Clang, MSVC, Vi, SARIF };
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 2adf4751444726b..64fd76b8e2cd02f 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -34,6 +34,7 @@ namespace clang {
 /// this large collection of bitfields is a trivial class type.
 class LangOptionsBase {
   friend class CompilerInvocation;
+  friend class CompilerInvocationBase;
 
 public:
   // Define simple language options (with no accessors).
diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h
index 2dc73b85bd4afe4..b79a1a53c150171 100644
--- a/clang/include/clang/Frontend/CompilerInvocation.h
+++ b/clang/include/clang/Frontend/CompilerInvocation.h
@@ -107,14 +107,23 @@ class CompilerInvocationBase {
   /// Options controlling preprocessed output.
   std::shared_ptr<PreprocessorOutputOptions> PreprocessorOutputOpts;
 
-public:
+  /// Dummy tag type whose instance can be passed into the constructor to
+  /// prevent creation of the reference-counted option objects.
+  struct EmptyConstructor {};
+
   CompilerInvocationBase();
-  CompilerInvocationBase(const CompilerInvocationBase &X) { operator=(X); }
+  CompilerInvocationBase(EmptyConstructor) {}
+  CompilerInvocationBase(const CompilerInvocationBase &X) = delete;
   CompilerInvocationBase(CompilerInvocationBase &&X) = default;
-  CompilerInvocationBase &operator=(const CompilerInvocationBase &X);
+  CompilerInvocationBase &operator=(const CompilerInvocationBase &X) = delete;
+  CompilerInvocationBase &deep_copy_assign(const CompilerInvocationBase &X);
+  CompilerInvocationBase &shallow_copy_assign(const CompilerInvocationBase &X);
   CompilerInvocationBase &operator=(CompilerInvocationBase &&X) = default;
   ~CompilerInvocationBase() = default;
 
+public:
+  /// Const getters.
+  /// @{
   const LangOptions &getLangOpts() const { return *LangOpts; }
   const TargetOptions &getTargetOpts() const { return *TargetOpts; }
   const DiagnosticOptions &getDiagnosticOpts() const { return *DiagnosticOpts; }
@@ -131,7 +140,101 @@ class CompilerInvocationBase {
   const PreprocessorOutputOptions &getPreprocessorOutputOpts() const {
     return *PreprocessorOutputOpts;
   }
+  /// @}
+
+  /// Command line generation.
+  /// @{
+  using StringAllocator = llvm::function_ref<const char *(const Twine &)>;
+  /// Generate cc1-compatible command line arguments from this instance.
+  ///
+  /// \param [out] Args - The generated arguments. Note that the caller is
+  /// responsible for inserting the path to the clang executable and "-cc1" if
+  /// desired.
+  /// \param SA - A function that given a Twine can allocate storage for a given
+  /// command line argument and return a pointer to the newly allocated string.
+  /// The returned pointer is what gets appended to Args.
+  void generateCC1CommandLine(llvm::SmallVectorImpl<const char *> &Args,
+                              StringAllocator SA) const {
+    generateCC1CommandLine([&](const Twine &Arg) {
+      // No need to allocate static string literals.
+      Args.push_back(Arg.isSingleStringLiteral()
+                         ? Arg.getSingleStringRef().data()
+                         : SA(Arg));
+    });
+  }
+
+  using ArgumentConsumer = llvm::function_ref<void(const Twine &)>;
+  /// Generate cc1-compatible command line arguments from this instance.
+  ///
+  /// \param Consumer - Callback that gets invoked for every single generated
+  /// command line argument.
+  void generateCC1CommandLine(ArgumentConsumer Consumer) const;
+
+  /// Generate cc1-compatible command line arguments from this instance,
+  /// wrapping the result as a std::vector<std::string>.
+  ///
+  /// This is a (less-efficient) wrapper over generateCC1CommandLine().
+  std::vector<std::string> getCC1CommandLine() const;
+
+private:
+  /// Generate command line options from DiagnosticOptions.
+  static void GenerateDiagnosticArgs(const DiagnosticOptions &Opts,
+                                     ArgumentConsumer Consumer,
+                                     bool DefaultDiagColor);
+
+  /// Generate command line options from LangOptions.
+  static void GenerateLangArgs(const LangOptions &Opts,
+                               ArgumentConsumer Consumer, const llvm::Triple &T,
+                               InputKind IK);
+
+  // Generate command line options from CodeGenOptions.
+  static void GenerateCodeGenArgs(const CodeGenOptions &Opts,
+                                  ArgumentConsumer Consumer,
+                                  const llvm::Triple &T,
+                                  const std::string &OutputFile,
+                                  const LangOptions *LangOpts);
+  /// @}
+};
+
+/// Helper class for holding the data necessary to invoke the compiler.
+///
+/// This class is designed to represent an abstract "invocation" of the
+/// compiler, including data such as the include paths, the code generation
+/// options, the warning flags, and so on.
+class CompilerInvocation : public CompilerInvocationBase {
+public:
+  CompilerInvocation() = default;
+  CompilerInvocation(const CompilerInvocation &X)
+      : CompilerInvocationBase(EmptyConstructor{}) {
+    deep_copy_assign(X);
+  }
+  CompilerInvocation(CompilerInvocation &&) = default;
+  CompilerInvocation &operator=(const CompilerInvocation &X) {
+    deep_copy_assign(X);
+    return *this;
+  }
+  ~CompilerInvocation() = default;
 
+  /// Const getters.
+  /// @{
+  // Note: These need to be pulled in manually. Otherwise, they get hidden by
+  // the mutable getters with the same names.
+  using CompilerInvocationBase::getLangOpts;
+  using CompilerInvocationBase::getTargetOpts;
+  using CompilerInvocationBase::getDiagnosticOpts;
+  using CompilerInvocationBase::getHeaderSearchOpts;
+  using CompilerInvocationBase::getPreprocessorOpts;
+  using CompilerInvocationBase::getAnalyzerOpts;
+  using CompilerInvocationBase::getMigratorOpts;
+  using CompilerInvocationBase::getCodeGenOpts;
+  using CompilerInvocationBase::getFileSystemOpts;
+  using CompilerInvocationBase::getFrontendOpts;
+  using CompilerInvocationBase::getDependencyOutputOpts;
+  using CompilerInvocationBase::getPreprocessorOutputOpts;
+  /// @}
+
+  /// Mutable getters.
+  /// @{
   LangOptions &getLangOpts() { return *LangOpts; }
   TargetOptions &getTargetOpts() { return *TargetOpts; }
   DiagnosticOptions &getDiagnosticOpts() { return *DiagnosticOpts; }
@@ -148,15 +251,8 @@ class CompilerInvocationBase {
   PreprocessorOutputOptions &getPreprocessorOutputOpts() {
     return *PreprocessorOutputOpts;
   }
-};
+  /// @}
 
-/// Helper class for holding the data necessary to invoke the compiler.
-///
-/// This class is designed to represent an abstract "invocation" of the
-/// compiler, including data such as the include paths, the code generation
-/// options, the warning flags, and so on.
-class CompilerInvocation : public CompilerInvocationBase {
-public:
   /// Base class internals.
   /// @{
   using CompilerInvocationBase::LangOpts;
@@ -200,38 +296,6 @@ class CompilerInvocation : public CompilerInvocationBase {
   /// identifying the conditions under which the module was built.
   std::string getModuleHash() const;
 
-  using StringAllocator = llvm::function_ref<const char *(const Twine &)>;
-  /// Generate cc1-compatible command line arguments from this instance.
-  ///
-  /// \param [out] Args - The generated arguments. Note that the caller is
-  /// responsible for inserting the path to the clang executable and "-cc1" if
-  /// desired.
-  /// \param SA - A function that given a Twine can allocate storage for a given
-  /// command line argument and return a pointer to the newly allocated string.
-  /// The returned pointer is what gets appended to Args.
-  void generateCC1CommandLine(llvm::SmallVectorImpl<const char *> &Args,
-                              StringAllocator SA) const {
-    generateCC1CommandLine([&](const Twine &Arg) {
-      // No need to allocate static string literals.
-      Args.push_back(Arg.isSingleStringLiteral()
-                         ? Arg.getSingleStringRef().data()
-                         : SA(Arg));
-    });
-  }
-
-  using ArgumentConsumer = llvm::function_ref<void(const Twine &)>;
-  /// Generate cc1-compatible command line arguments from this instance.
-  ///
-  /// \param Consumer - Callback that gets invoked for every single generated
-  /// command line argument.
-  void generateCC1CommandLine(ArgumentConsumer Consumer) const;
-
-  /// Generate cc1-compatible command line arguments from this instance,
-  /// wrapping the result as a std::vector<std::string>.
-  ///
-  /// This is a (less-efficient) wrapper over generateCC1CommandLine().
-  std::vector<std::string> getCC1CommandLine() const;
-
   /// Check that \p Args can be parsed and re-serialized without change,
   /// emiting diagnostics for any differences.
   ///
@@ -256,35 +320,57 @@ class CompilerInvocation : public CompilerInvocationBase {
                                  ArrayRef<const char *> CommandLineArgs,
                                  DiagnosticsEngine &Diags, const char *Argv0);
 
-  /// Generate command line options from DiagnosticOptions.
-  static void GenerateDiagnosticArgs(const DiagnosticOptions &Opts,
-                                     ArgumentConsumer Consumer,
-                                     bool DefaultDiagColor);
-
   /// Parse command line options that map to LangOptions.
   static bool ParseLangArgs(LangOptions &Opts, llvm::opt::ArgList &Args,
                             InputKind IK, const llvm::Triple &T,
                             std::vector<std::string> &Includes,
                             DiagnosticsEngine &Diags);
 
-  /// Generate command line options from LangOptions.
-  static void GenerateLangArgs(const LangOptions &Opts,
-                               ArgumentConsumer Consumer, const llvm::Triple &T,
-                               InputKind IK);
-
   /// Parse command line options that map to CodeGenOptions.
   static bool ParseCodeGenArgs(CodeGenOptions &Opts, llvm::opt::ArgList &Args,
                                InputKind IK, DiagnosticsEngine &Diags,
                                const llvm::Triple &T,
                                const std::string &OutputFile,
                                const LangOptions &LangOptsRef);
+};
 
-  // Generate command line options from CodeGenOptions.
-  static void GenerateCodeGenArgs(const CodeGenOptions &Opts,
-                                  ArgumentConsumer Consumer,
-                                  const llvm::Triple &T,
-                                  const std::string &OutputFile,
-                                  const LangOptions *LangOpts);
+/// Same as \c CompilerInvocation, but with copy-on-write optimization.
+class CowCompilerInvocation : public CompilerInvocationBase {
+public:
+  CowCompilerInvocation() = default;
+  CowCompilerInvocation(const CowCompilerInvocation &X)
+      : CompilerInvocationBase(EmptyConstructor{}) {
+    shallow_copy_assign(X);
+  }
+  CowCompilerInvocation(CowCompilerInvocation &&) = default;
+  CowCompilerInvocation &operator=(const CowCompilerInvocation &X) {
+    shallow_copy_assign(X);
+    return *this;
+  }
+  ~CowCompilerInvocation() = default;
+
+  CowCompilerInvocation(const CompilerInvocation &X)
+      : CompilerInvocationBase(EmptyConstructor{}) {
+    deep_copy_assign(X);
+  }
+
+  // Const getters are inherited from the base class.
+
+  /// Mutable getters.
+  /// @{
+  LangOptions &getMutLangOpts();
+  TargetOptions &getMutTargetOpts();
+  DiagnosticOptions &getMutDiagnosticOpts();
+  HeaderSearchOptions &getMutHeaderSearchOpts();
+  PreprocessorOptions &getMutPreprocessorOpts();
+  AnalyzerOptions &getMutAnalyzerOpts();
+  MigratorOptions &getMutMigratorOpts();
+  CodeGenOptions &getMutCodeGenOpts();
+  FileSystemOptions &getMutFileSystemOpts();
+  FrontendOptions &getMutFrontendOpts();
+  DependencyOutputOptions &getMutDependencyOutputOpts();
+  PreprocessorOutputOptions &getMutPreprocessorOutputOpts();
+  /// @}
 };
 
 IntrusiveRefCntPtr<llvm::vfs::FileSystem>
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index c0dab4e64ff1926..51644e8532c31eb 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -152,7 +152,7 @@ CompilerInvocationBase::CompilerInvocationBase()
       PreprocessorOutputOpts(std::make_shared<PreprocessorOutputOptions>()) {}
 
 CompilerInvocationBase &
-CompilerInvocationBase::operator=(const CompilerInvocationBase &X) {
+CompilerInvocationBase::deep_copy_assign(const CompilerInvocationBase &X) {
   if (this != &X) {
     LangOpts = make_shared_copy(X.getLangOpts());
     TargetOpts = make_shared_copy(X.getTargetOpts());
@@ -170,6 +170,90 @@ CompilerInvocationBase::operator=(const CompilerInvocationBase &X) {
   return *this;
 }
 
+CompilerInvocationBase &
+CompilerInvocationBase::shallow_copy_assign(const CompilerInvocationBase &X) {
+  if (this != &X) {
+    LangOpts = X.LangOpts;
+    TargetOpts = X.TargetOpts;
+    DiagnosticOpts = X.DiagnosticOpts;
+    HSOpts = X.HSOpts;
+    PPOpts = X.PPOpts;
+    AnalyzerOpts = X.AnalyzerOpts;
+    MigratorOpts = X.MigratorOpts;
+    CodeGenOpts = X.CodeGenOpts;
+    FSOpts = X.FSOpts;
+    FrontendOpts = X.FrontendOpts;
+    DependencyOutputOpts = X.DependencyOutputOpts;
+    PreprocessorOutputOpts = X.PreprocessorOutputOpts;
+  }
+  return *this;
+}
+
+namespace {
+template <typename T>
+T &ensureOwned(std::shared_ptr<T> &Storage) {
+  if (Storage.use_count() > 1)
+    Storage = std::make_shared<T>(*Storage);
+  return *Storage;
+}
+
+template <typename T>
+T &ensureOwned(llvm::IntrusiveRefCntPtr<T> &Storage) {
+  if (Storage.useCount() > 1)
+    Storage = llvm::makeIntrusiveRefCnt<T>(*Storage);
+  return *Storage;
+}
+} // namespace
+
+LangOptions &CowCompilerInvocation::getMutLangOpts() {
+  return ensureOwned(LangOpts);
+}
+
+TargetOptions &CowCompilerInvocation::getMutTargetOpts() {
+  return ensureOwned(TargetOpts);
+}
+
+DiagnosticOptions &CowCompilerInvocation::getMutDiagnosticOpts() {
+  return ensureOwned(DiagnosticOpts);
+}
+
+HeaderSearchOptions &CowCompilerInvocation::getMutHeaderSearchOpts() {
+  return ensureOwned(HSOpts);
+}
+
+PreprocessorOptions &CowCompilerInvocation::getMutPreprocessorOpts() {
+  return ensureOwned(PPOpts);
+}
+
+AnalyzerOptions &CowCompilerInvocation::getMutAnalyzerOpts() {
+  return ensureOwned(AnalyzerOpts);
+}
+
+MigratorOptions &CowCompilerInvocation::getMutMigratorOpts() {
+  return ensureOwned(MigratorOpts);
+}
+
+CodeGenOptions &CowCompilerInvocation::getMutCodeGenOpts() {
+  return ensureOwned(CodeGenOpts);
+}
+
+FileSystemOptions &CowCompilerInvocation::getMutFileSystemOpts() {
+  return ensureOwned(FSOpts);
+}
+
+FrontendOptions &CowCompilerInvocation::getMutFrontendOpts() {
+  return ensureOwned(FrontendOpts);
+}
+
+DependencyOutputOptions &CowCompilerInvocation::getMutDependencyOutputOpts() {
+  return ensureOwned(DependencyOutputOpts);
+}
+
+PreprocessorOutputOptions &
+CowCompilerInvocation::getMutPreprocessorOutputOpts() {
+  return ensureOwned(PreprocessorOutputOpts);
+}
+
 //===----------------------------------------------------------------------===//
 // Normalizers
 //===----------------------------------------------------------------------===//
@@ -1355,11 +1439,11 @@ static void setPGOUseInstrumentor(CodeGenOptions &Opts,
     Opts.setProfileUse(CodeGenOptions::ProfileClangInstr);
 }
 
-void CompilerInvocation::GenerateCodeGenArgs(const CodeGenOptions &Opts,
-                                             ArgumentConsumer Consumer,
-                                             const llvm::Triple &T,
-                                             const std::string &OutputFile,
-                                             const LangOptions *LangOpts) {
+void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts,
+                                                 ArgumentConsumer Consumer,
+                                                 const llvm::Triple &T,
+                                                 const std::string &OutputFile,
+                                                 const LangOptions *LangOpts) {
   const CodeGenOptions &CodeGenOpts = Opts;
 
   if (Opts.OptimizationLevel == 0)
@@ -2263,9 +2347,9 @@ static bool ParseMigratorArgs(MigratorOptions &Opts, const ArgList &Args,
   return Diags.getNumErrors() == NumErrorsBefore;
 }
 
-void CompilerInvocation::GenerateDiagnosticArgs(const DiagnosticOptions &Opts,
-                                                ArgumentConsumer Consumer,
-                                                bool DefaultDiagColor) {
+void CompilerInvocationBase::GenerateDiagnosticArgs(
+    const DiagnosticOptions &Opts, ArgumentConsumer Consumer,
+    bool DefaultDiagColor) {
   const DiagnosticOptions *DiagnosticOpts = &Opts;
 #define DIAG_OPTION_WITH_MARSHALLING(...)                                      \
   GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__)
@@ -3256,9 +3340,10 @@ static StringRef GetInputKindName(InputKind IK) {
   llvm_unreachable("unknown input language");
 }
 
-void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
-                                          ArgumentConsumer Consumer,
-                                          const llvm::Triple &T, InputKind IK) {
+void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts,
+                                              ArgumentConsumer Consumer,
+                                              const llvm::Triple &T,
+                                              InputKind IK) {
   if (IK.getFormat() == InputKind::Precompiled ||
       IK.getLanguage() == Language::LLVM_IR) {
     if (Opts.ObjCAutoRefCount)
@@ -4597,7 +4682,7 @@ std::string CompilerInvocation::getModuleHash() const {
   return toString(llvm::APInt(64, Hash), 36, /*Signed=*/false);
 }
 
-void CompilerInvocation::generateCC1CommandLine(
+void CompilerInvocationBase::generateCC1CommandLine(
     ArgumentConsumer Consumer) const {
   llvm::Triple T(getTargetOpts().Triple);
 
@@ -4619,7 +4704,7 @@ void CompilerInvocation::generateCC1CommandLine(
   GenerateDependencyOutputArgs(getDependencyOutputOpts(), Consumer);
 }
 
-std::vector<std::string> CompilerInvocation::getCC1CommandLine() const {
+std::vector<std::string> CompilerInvocationBase::getCC1CommandLine() const {
   std::vector<std::string> Args{"-cc1"};
   generateCC1CommandLine(
       [&Args](const Twine &Arg) { Args.push_back(Arg.str()); });



More information about the cfe-commits mailing list