[clang] [clang][CodeGen] Shift relink option implementation away from module cloning (PR #81693)

Jacob Lambert via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 13 17:36:02 PST 2024


https://github.com/lamb-j updated https://github.com/llvm/llvm-project/pull/81693

>From aff288af78b94dbd7ef317ce368f25a305798adc Mon Sep 17 00:00:00 2001
From: Jacob Lambert <jacob.lambert at amd.com>
Date: Tue, 13 Feb 2024 17:30:21 -0800
Subject: [PATCH 1/2] [clang][CodeGen] Shift relink option implementation away
 from module cloning

We recently implemented a new option allowing relinking of
bitcode modules via the "-mllvm -relink-builtin-bitcode-postop"
option.

This implementation relied on llvm::CloneModule() in order to pass
copies to modules and preserve the original modules for later
relinking.

However, cloning modules has been found to be prohibitively
expensive, significantly increasing compilation time for large
bitcode libraries.

In this patch, we shift the relink option implementation to instead
link the original modules initially, and reload modules from the file
system if relinking is requested. This approach results in
significantly reduced overhead.
---
 clang/include/clang/Basic/FileManager.h |   4 +-
 clang/lib/Basic/FileManager.cpp         |   2 +-
 clang/lib/CodeGen/BackendConsumer.h     |  16 ++-
 clang/lib/CodeGen/CodeGenAction.cpp     | 163 ++++++++++++------------
 clang/lib/CodeGen/LinkInModulesPass.cpp |  14 +-
 5 files changed, 110 insertions(+), 89 deletions(-)

diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index 997c17a0ffcfcc..2245fd78bfc9f0 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -283,7 +283,7 @@ class FileManager : public RefCountedBase<FileManager> {
                    bool RequiresNullTerminator = true);
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
   getBufferForFile(StringRef Filename, bool isVolatile = false,
-                   bool RequiresNullTerminator = true) {
+                   bool RequiresNullTerminator = true) const {
     return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile,
                                 RequiresNullTerminator);
   }
@@ -291,7 +291,7 @@ class FileManager : public RefCountedBase<FileManager> {
 private:
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
   getBufferForFileImpl(StringRef Filename, int64_t FileSize, bool isVolatile,
-                       bool RequiresNullTerminator);
+                       bool RequiresNullTerminator) const;
 
 public:
   /// Get the 'stat' information for the given \p Path.
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index 6097a27e429d66..cd520a6375e07e 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -547,7 +547,7 @@ FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile,
 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
 FileManager::getBufferForFileImpl(StringRef Filename, int64_t FileSize,
                                   bool isVolatile,
-                                  bool RequiresNullTerminator) {
+                                  bool RequiresNullTerminator) const {
   if (FileSystemOpts.WorkingDir.empty())
     return FS->getBufferForFile(Filename, FileSize, RequiresNullTerminator,
                                 isVolatile);
diff --git a/clang/lib/CodeGen/BackendConsumer.h b/clang/lib/CodeGen/BackendConsumer.h
index 72a814cd43d738..fd0f1984d6c0f7 100644
--- a/clang/lib/CodeGen/BackendConsumer.h
+++ b/clang/lib/CodeGen/BackendConsumer.h
@@ -34,6 +34,7 @@ class BackendConsumer : public ASTConsumer {
   const CodeGenOptions &CodeGenOpts;
   const TargetOptions &TargetOpts;
   const LangOptions &LangOpts;
+  const FileManager &FileMgr;
   std::unique_ptr<raw_pwrite_stream> AsmOutStream;
   ASTContext *Context;
   IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
@@ -74,8 +75,8 @@ class BackendConsumer : public ASTConsumer {
                   const HeaderSearchOptions &HeaderSearchOpts,
                   const PreprocessorOptions &PPOpts,
                   const CodeGenOptions &CodeGenOpts,
-                  const TargetOptions &TargetOpts,
-                  const LangOptions &LangOpts, const std::string &InFile,
+                  const TargetOptions &TargetOpts, const LangOptions &LangOpts,
+                  const FileManager &FileMgr, const std::string &InFile,
                   SmallVector<LinkModule, 4> LinkModules,
                   std::unique_ptr<raw_pwrite_stream> OS, llvm::LLVMContext &C,
                   CoverageSourceInfo *CoverageInfo = nullptr);
@@ -88,8 +89,8 @@ class BackendConsumer : public ASTConsumer {
                   const HeaderSearchOptions &HeaderSearchOpts,
                   const PreprocessorOptions &PPOpts,
                   const CodeGenOptions &CodeGenOpts,
-                  const TargetOptions &TargetOpts,
-                  const LangOptions &LangOpts, llvm::Module *Module,
+                  const TargetOptions &TargetOpts, const LangOptions &LangOpts,
+                  const FileManager &FileMgr, llvm::Module *Module,
                   SmallVector<LinkModule, 4> LinkModules, llvm::LLVMContext &C,
                   CoverageSourceInfo *CoverageInfo = nullptr);
 
@@ -111,10 +112,13 @@ class BackendConsumer : public ASTConsumer {
   void AssignInheritanceModel(CXXRecordDecl *RD) override;
   void HandleVTable(CXXRecordDecl *RD) override;
 
-
- // Links each entry in LinkModules into our module.  Returns true on error.
+  // Links each entry in LinkModules into our module.  Returns true on error.
   bool LinkInModules(llvm::Module *M, bool ShouldLinkFiles = true);
 
+  // Load a bitcode module from -mlink-builtin-bitcode option using
+  // methods from a BackendConsumer instead of CompilerInstance
+  bool ReloadModules(llvm::Module *M);
+
   /// Get the best possible source location to represent a diagnostic that
   /// may have associated debug info.
   const FullSourceLoc getBestLocationFromDebugLoc(
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index f8038497d90a7b..88b6ce84b51105 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -109,56 +109,52 @@ static void reportOptRecordError(Error E, DiagnosticsEngine &Diags,
       });
 }
 
-BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
-                                 IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
-                                 const HeaderSearchOptions &HeaderSearchOpts,
-                                 const PreprocessorOptions &PPOpts,
-                                 const CodeGenOptions &CodeGenOpts,
-                                 const TargetOptions &TargetOpts,
-                                 const LangOptions &LangOpts,
-                                 const std::string &InFile,
-                                 SmallVector<LinkModule, 4> LinkModules,
-                                 std::unique_ptr<raw_pwrite_stream> OS,
-                                 LLVMContext &C,
-                                 CoverageSourceInfo *CoverageInfo)
-  : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
-  CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
-  AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS),
-  LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
-  LLVMIRGenerationRefCount(0),
-  Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts,
-                        PPOpts, CodeGenOpts, C, CoverageInfo)),
-  LinkModules(std::move(LinkModules)) {
-    TimerIsEnabled = CodeGenOpts.TimePasses;
-    llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
-    llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
+BackendConsumer::BackendConsumer(
+    BackendAction Action, DiagnosticsEngine &Diags,
+    IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+    const HeaderSearchOptions &HeaderSearchOpts,
+    const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
+    const TargetOptions &TargetOpts, const LangOptions &LangOpts,
+    const FileManager &FileMgr, const std::string &InFile,
+    SmallVector<LinkModule, 4> LinkModules,
+    std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
+    CoverageSourceInfo *CoverageInfo)
+    : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
+      CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
+      FileMgr(FileMgr), AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS),
+      LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
+      LLVMIRGenerationRefCount(0),
+      Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts,
+                            PPOpts, CodeGenOpts, C, CoverageInfo)),
+      LinkModules(std::move(LinkModules)) {
+  TimerIsEnabled = CodeGenOpts.TimePasses;
+  llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
+  llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
 }
 
 // This constructor is used in installing an empty BackendConsumer
 // to use the clang diagnostic handler for IR input files. It avoids
 // initializing the OS field.
-BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
-                                 IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
-                                 const HeaderSearchOptions &HeaderSearchOpts,
-                                 const PreprocessorOptions &PPOpts,
-                                 const CodeGenOptions &CodeGenOpts,
-                                 const TargetOptions &TargetOpts,
-                                 const LangOptions &LangOpts,
-                                 llvm::Module *Module,
-                                 SmallVector<LinkModule, 4> LinkModules,
-                                 LLVMContext &C,
-                                 CoverageSourceInfo *CoverageInfo)
-  : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
-  CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
-  Context(nullptr), FS(VFS),
-  LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
-  LLVMIRGenerationRefCount(0),
-  Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts,
-                        PPOpts, CodeGenOpts, C, CoverageInfo)),
-  LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
-    TimerIsEnabled = CodeGenOpts.TimePasses;
-    llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
-    llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
+BackendConsumer::BackendConsumer(
+    BackendAction Action, DiagnosticsEngine &Diags,
+    IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+    const HeaderSearchOptions &HeaderSearchOpts,
+    const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
+    const TargetOptions &TargetOpts, const LangOptions &LangOpts,
+    const FileManager &FileMgr, llvm::Module *Module,
+    SmallVector<LinkModule, 4> LinkModules, LLVMContext &C,
+    CoverageSourceInfo *CoverageInfo)
+    : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
+      CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
+      FileMgr(FileMgr), Context(nullptr), FS(VFS),
+      LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
+      LLVMIRGenerationRefCount(0),
+      Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, PPOpts,
+                            CodeGenOpts, C, CoverageInfo)),
+      LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
+  TimerIsEnabled = CodeGenOpts.TimePasses;
+  llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
+  llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
 }
 
 llvm::Module* BackendConsumer::getModule() const {
@@ -233,9 +229,37 @@ void BackendConsumer::HandleInterestingDecl(DeclGroupRef D) {
     HandleTopLevelDecl(D);
 }
 
+bool BackendConsumer::ReloadModules(llvm::Module *M) {
+  for (const CodeGenOptions::BitcodeFileToLink &F :
+       CodeGenOpts.LinkBitcodeFiles) {
+    auto BCBuf = FileMgr.getBufferForFile(F.Filename);
+    if (!BCBuf) {
+      Diags.Report(diag::err_cannot_open_file)
+          << F.Filename << BCBuf.getError().message();
+      LinkModules.clear();
+      return true;
+    }
+
+    LLVMContext &Ctx = getModule()->getContext();
+    Expected<std::unique_ptr<llvm::Module>> ModuleOrErr =
+        getOwningLazyBitcodeModule(std::move(*BCBuf), Ctx);
+
+    if (!ModuleOrErr) {
+      handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+        Diags.Report(diag::err_cannot_open_file) << F.Filename << EIB.message();
+      });
+      LinkModules.clear();
+      return true;
+    }
+    LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+                           F.Internalize, F.LinkFlags});
+  }
+
+  return false; // success
+}
+
 // Links each entry in LinkModules into our module.  Returns true on error.
 bool BackendConsumer::LinkInModules(llvm::Module *M, bool ShouldLinkFiles) {
-
   for (auto &LM : LinkModules) {
     assert(LM.Module && "LinkModule does not actually have a module");
 
@@ -257,37 +281,19 @@ bool BackendConsumer::LinkInModules(llvm::Module *M, bool ShouldLinkFiles) {
     CurLinkModule = LM.Module.get();
     bool Err;
 
-    auto DoLink = [&](auto &Mod) {
-      if (LM.Internalize) {
-        Err = Linker::linkModules(
-            *M, std::move(Mod), LM.LinkFlags,
-            [](llvm::Module &M, const llvm::StringSet<> &GVS) {
-              internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) {
-                return !GV.hasName() || (GVS.count(GV.getName()) == 0);
-              });
+    if (LM.Internalize) {
+      Err = Linker::linkModules(
+          *M, std::move(LM.Module), LM.LinkFlags,
+          [](llvm::Module &M, const llvm::StringSet<> &GVS) {
+            internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) {
+              return !GV.hasName() || (GVS.count(GV.getName()) == 0);
             });
-      } else
-        Err = Linker::linkModules(*M, std::move(Mod), LM.LinkFlags);
-    };
-
-    // Create a Clone to move to the linker, which preserves the original
-    // linking modules, allowing them to be linked again in the future
-    if (ClRelinkBuiltinBitcodePostop) {
-      // TODO: If CloneModule() is updated to support cloning of unmaterialized
-      // modules, we can remove this
-      if (Error E = CurLinkModule->materializeAll())
-        return false;
-
-      std::unique_ptr<llvm::Module> Clone = llvm::CloneModule(*LM.Module);
-
-      DoLink(Clone);
-    }
-    // Otherwise we can link (and clean up) the original modules
-    else {
-      DoLink(LM.Module);
-    }
+          });
+    } else
+      Err = Linker::linkModules(*M, std::move(LM.Module), LM.LinkFlags);
   }
 
+  LinkModules.clear();
   return false; // success
 }
 
@@ -350,7 +356,7 @@ void BackendConsumer::HandleTranslationUnit(ASTContext &C) {
   }
 
   // Link each LinkModule into our module.
-  if (LinkInModules(getModule()))
+  if (LinkInModules(getModule(), true))
     return;
 
   for (auto &F : getModule()->functions()) {
@@ -1037,8 +1043,9 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
       BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(),
       CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(),
-      CI.getTargetOpts(), CI.getLangOpts(), std::string(InFile),
-      std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo));
+      CI.getTargetOpts(), CI.getLangOpts(), CI.getFileManager(),
+      std::string(InFile), std::move(LinkModules), std::move(OS), *VMContext,
+      CoverageInfo));
   BEConsumer = Result.get();
 
   // Enable generating macro debug info only when debug info is not disabled and
@@ -1199,7 +1206,7 @@ void CodeGenAction::ExecuteAction() {
   BackendConsumer Result(BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(),
                          CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(),
                          CI.getCodeGenOpts(), CI.getTargetOpts(),
-                         CI.getLangOpts(), TheModule.get(),
+                         CI.getLangOpts(), CI.getFileManager(), TheModule.get(),
                          std::move(LinkModules), *VMContext, nullptr);
 
   // Link in each pending link module.
diff --git a/clang/lib/CodeGen/LinkInModulesPass.cpp b/clang/lib/CodeGen/LinkInModulesPass.cpp
index 6ce2b94c1db82c..929539cc8f3346 100644
--- a/clang/lib/CodeGen/LinkInModulesPass.cpp
+++ b/clang/lib/CodeGen/LinkInModulesPass.cpp
@@ -14,6 +14,10 @@
 #include "LinkInModulesPass.h"
 #include "BackendConsumer.h"
 
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+
 using namespace llvm;
 
 LinkInModulesPass::LinkInModulesPass(clang::BackendConsumer *BC,
@@ -21,9 +25,15 @@ LinkInModulesPass::LinkInModulesPass(clang::BackendConsumer *BC,
     : BC(BC), ShouldLinkFiles(ShouldLinkFiles) {}
 
 PreservedAnalyses LinkInModulesPass::run(Module &M, ModuleAnalysisManager &AM) {
+  if (!BC)
+    return PreservedAnalyses::all();
+
+  // Re-load bitcode modules from files
+  if (BC->ReloadModules(&M))
+    report_fatal_error("Bitcode module re-loading failed, aborted!");
 
-  if (BC && BC->LinkInModules(&M, ShouldLinkFiles))
-    report_fatal_error("Bitcode module linking failed, compilation aborted!");
+  if (BC->LinkInModules(&M, ShouldLinkFiles))
+    report_fatal_error("Bitcode module re-linking failed, aborted!");
 
   return PreservedAnalyses::all();
 }

>From b85e53a8ce0dcda3bd151739e269fb4794b1a88d Mon Sep 17 00:00:00 2001
From: Jacob Lambert <jacob.lambert at amd.com>
Date: Tue, 13 Feb 2024 17:35:51 -0800
Subject: [PATCH 2/2] Remove redundant default

---
 clang/lib/CodeGen/CodeGenAction.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index 88b6ce84b51105..ab08a875e7e9c1 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -356,7 +356,7 @@ void BackendConsumer::HandleTranslationUnit(ASTContext &C) {
   }
 
   // Link each LinkModule into our module.
-  if (LinkInModules(getModule(), true))
+  if (LinkInModules(getModule()))
     return;
 
   for (auto &F : getModule()->functions()) {



More information about the cfe-commits mailing list