[clang] 6d4ffbd - [clang][CodeGen] Shift relink option implementation away from module cloning (#81693)

via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 14 10:39:25 PST 2024


Author: Jacob Lambert
Date: 2024-02-14T10:39:21-08:00
New Revision: 6d4ffbdfa8ff90e4ee6081ad8dbb8ec24e982a02

URL: https://github.com/llvm/llvm-project/commit/6d4ffbdfa8ff90e4ee6081ad8dbb8ec24e982a02
DIFF: https://github.com/llvm/llvm-project/commit/6d4ffbdfa8ff90e4ee6081ad8dbb8ec24e982a02.diff

LOG: [clang][CodeGen] Shift relink option implementation away from module cloning (#81693)

We recently implemented a new option allowing relinking of bitcode
modules via the "-mllvm -relink-builtin-bitcode-postop"
option.

This implementation relied on llvm::CloneModule() in order to pass
copies to modules and preserve the original modules for later relinking.
However, cloning modules has been found to be prohibitively expensive,
significantly increasing compilation time for large bitcode libraries.

In this patch, we shift the relink option implementation to instead link
the original modules initially, and reload modules from the file system
if relinking is requested. This approach results in significantly
reduced overhead.

We accomplish this by creating a new ReloadModules() routine that can be
called from a BackendConsumer class, to mimic the behavior of
ASTConsumer's loadLinkModules(), but without access to the
CompilerInstance.

Because loading the bitcodes from the filesystem requires access to the
FileManager class, we also forward a reference to the CompilerInstance
class to the BackendConsumer. This mirrors what is already done for
several CompilerInstance members, such as TargetOptions and
CodeGenOptions.

Finally, we needed to add a const specifier to the
FileManager::getBufferForFile() routine to allow it to be called using
the const reference returned from CompilerInstance::getFileManager()

Added: 
    

Modified: 
    clang/include/clang/Basic/FileManager.h
    clang/lib/Basic/FileManager.cpp
    clang/lib/CodeGen/BackendConsumer.h
    clang/lib/CodeGen/CodeGenAction.cpp
    clang/lib/CodeGen/LinkInModulesPass.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index 997c17a0ffcfcc..2245fd78bfc9f0 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -283,7 +283,7 @@ class FileManager : public RefCountedBase<FileManager> {
                    bool RequiresNullTerminator = true);
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
   getBufferForFile(StringRef Filename, bool isVolatile = false,
-                   bool RequiresNullTerminator = true) {
+                   bool RequiresNullTerminator = true) const {
     return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile,
                                 RequiresNullTerminator);
   }
@@ -291,7 +291,7 @@ class FileManager : public RefCountedBase<FileManager> {
 private:
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
   getBufferForFileImpl(StringRef Filename, int64_t FileSize, bool isVolatile,
-                       bool RequiresNullTerminator);
+                       bool RequiresNullTerminator) const;
 
 public:
   /// Get the 'stat' information for the given \p Path.

diff  --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index 6097a27e429d66..cd520a6375e07e 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -547,7 +547,7 @@ FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile,
 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
 FileManager::getBufferForFileImpl(StringRef Filename, int64_t FileSize,
                                   bool isVolatile,
-                                  bool RequiresNullTerminator) {
+                                  bool RequiresNullTerminator) const {
   if (FileSystemOpts.WorkingDir.empty())
     return FS->getBufferForFile(Filename, FileSize, RequiresNullTerminator,
                                 isVolatile);

diff  --git a/clang/lib/CodeGen/BackendConsumer.h b/clang/lib/CodeGen/BackendConsumer.h
index 72a814cd43d738..fd0f1984d6c0f7 100644
--- a/clang/lib/CodeGen/BackendConsumer.h
+++ b/clang/lib/CodeGen/BackendConsumer.h
@@ -34,6 +34,7 @@ class BackendConsumer : public ASTConsumer {
   const CodeGenOptions &CodeGenOpts;
   const TargetOptions &TargetOpts;
   const LangOptions &LangOpts;
+  const FileManager &FileMgr;
   std::unique_ptr<raw_pwrite_stream> AsmOutStream;
   ASTContext *Context;
   IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
@@ -74,8 +75,8 @@ class BackendConsumer : public ASTConsumer {
                   const HeaderSearchOptions &HeaderSearchOpts,
                   const PreprocessorOptions &PPOpts,
                   const CodeGenOptions &CodeGenOpts,
-                  const TargetOptions &TargetOpts,
-                  const LangOptions &LangOpts, const std::string &InFile,
+                  const TargetOptions &TargetOpts, const LangOptions &LangOpts,
+                  const FileManager &FileMgr, const std::string &InFile,
                   SmallVector<LinkModule, 4> LinkModules,
                   std::unique_ptr<raw_pwrite_stream> OS, llvm::LLVMContext &C,
                   CoverageSourceInfo *CoverageInfo = nullptr);
@@ -88,8 +89,8 @@ class BackendConsumer : public ASTConsumer {
                   const HeaderSearchOptions &HeaderSearchOpts,
                   const PreprocessorOptions &PPOpts,
                   const CodeGenOptions &CodeGenOpts,
-                  const TargetOptions &TargetOpts,
-                  const LangOptions &LangOpts, llvm::Module *Module,
+                  const TargetOptions &TargetOpts, const LangOptions &LangOpts,
+                  const FileManager &FileMgr, llvm::Module *Module,
                   SmallVector<LinkModule, 4> LinkModules, llvm::LLVMContext &C,
                   CoverageSourceInfo *CoverageInfo = nullptr);
 
@@ -111,10 +112,13 @@ class BackendConsumer : public ASTConsumer {
   void AssignInheritanceModel(CXXRecordDecl *RD) override;
   void HandleVTable(CXXRecordDecl *RD) override;
 
-
- // Links each entry in LinkModules into our module.  Returns true on error.
+  // Links each entry in LinkModules into our module.  Returns true on error.
   bool LinkInModules(llvm::Module *M, bool ShouldLinkFiles = true);
 
+  // Load a bitcode module from -mlink-builtin-bitcode option using
+  // methods from a BackendConsumer instead of CompilerInstance
+  bool ReloadModules(llvm::Module *M);
+
   /// Get the best possible source location to represent a diagnostic that
   /// may have associated debug info.
   const FullSourceLoc getBestLocationFromDebugLoc(

diff  --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index f8038497d90a7b..ab08a875e7e9c1 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -109,56 +109,52 @@ static void reportOptRecordError(Error E, DiagnosticsEngine &Diags,
       });
 }
 
-BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
-                                 IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
-                                 const HeaderSearchOptions &HeaderSearchOpts,
-                                 const PreprocessorOptions &PPOpts,
-                                 const CodeGenOptions &CodeGenOpts,
-                                 const TargetOptions &TargetOpts,
-                                 const LangOptions &LangOpts,
-                                 const std::string &InFile,
-                                 SmallVector<LinkModule, 4> LinkModules,
-                                 std::unique_ptr<raw_pwrite_stream> OS,
-                                 LLVMContext &C,
-                                 CoverageSourceInfo *CoverageInfo)
-  : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
-  CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
-  AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS),
-  LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
-  LLVMIRGenerationRefCount(0),
-  Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts,
-                        PPOpts, CodeGenOpts, C, CoverageInfo)),
-  LinkModules(std::move(LinkModules)) {
-    TimerIsEnabled = CodeGenOpts.TimePasses;
-    llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
-    llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
+BackendConsumer::BackendConsumer(
+    BackendAction Action, DiagnosticsEngine &Diags,
+    IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+    const HeaderSearchOptions &HeaderSearchOpts,
+    const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
+    const TargetOptions &TargetOpts, const LangOptions &LangOpts,
+    const FileManager &FileMgr, const std::string &InFile,
+    SmallVector<LinkModule, 4> LinkModules,
+    std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
+    CoverageSourceInfo *CoverageInfo)
+    : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
+      CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
+      FileMgr(FileMgr), AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS),
+      LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
+      LLVMIRGenerationRefCount(0),
+      Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts,
+                            PPOpts, CodeGenOpts, C, CoverageInfo)),
+      LinkModules(std::move(LinkModules)) {
+  TimerIsEnabled = CodeGenOpts.TimePasses;
+  llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
+  llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
 }
 
 // This constructor is used in installing an empty BackendConsumer
 // to use the clang diagnostic handler for IR input files. It avoids
 // initializing the OS field.
-BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
-                                 IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
-                                 const HeaderSearchOptions &HeaderSearchOpts,
-                                 const PreprocessorOptions &PPOpts,
-                                 const CodeGenOptions &CodeGenOpts,
-                                 const TargetOptions &TargetOpts,
-                                 const LangOptions &LangOpts,
-                                 llvm::Module *Module,
-                                 SmallVector<LinkModule, 4> LinkModules,
-                                 LLVMContext &C,
-                                 CoverageSourceInfo *CoverageInfo)
-  : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
-  CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
-  Context(nullptr), FS(VFS),
-  LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
-  LLVMIRGenerationRefCount(0),
-  Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts,
-                        PPOpts, CodeGenOpts, C, CoverageInfo)),
-  LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
-    TimerIsEnabled = CodeGenOpts.TimePasses;
-    llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
-    llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
+BackendConsumer::BackendConsumer(
+    BackendAction Action, DiagnosticsEngine &Diags,
+    IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+    const HeaderSearchOptions &HeaderSearchOpts,
+    const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
+    const TargetOptions &TargetOpts, const LangOptions &LangOpts,
+    const FileManager &FileMgr, llvm::Module *Module,
+    SmallVector<LinkModule, 4> LinkModules, LLVMContext &C,
+    CoverageSourceInfo *CoverageInfo)
+    : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
+      CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
+      FileMgr(FileMgr), Context(nullptr), FS(VFS),
+      LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
+      LLVMIRGenerationRefCount(0),
+      Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, PPOpts,
+                            CodeGenOpts, C, CoverageInfo)),
+      LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
+  TimerIsEnabled = CodeGenOpts.TimePasses;
+  llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
+  llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
 }
 
 llvm::Module* BackendConsumer::getModule() const {
@@ -233,9 +229,37 @@ void BackendConsumer::HandleInterestingDecl(DeclGroupRef D) {
     HandleTopLevelDecl(D);
 }
 
+bool BackendConsumer::ReloadModules(llvm::Module *M) {
+  for (const CodeGenOptions::BitcodeFileToLink &F :
+       CodeGenOpts.LinkBitcodeFiles) {
+    auto BCBuf = FileMgr.getBufferForFile(F.Filename);
+    if (!BCBuf) {
+      Diags.Report(diag::err_cannot_open_file)
+          << F.Filename << BCBuf.getError().message();
+      LinkModules.clear();
+      return true;
+    }
+
+    LLVMContext &Ctx = getModule()->getContext();
+    Expected<std::unique_ptr<llvm::Module>> ModuleOrErr =
+        getOwningLazyBitcodeModule(std::move(*BCBuf), Ctx);
+
+    if (!ModuleOrErr) {
+      handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+        Diags.Report(diag::err_cannot_open_file) << F.Filename << EIB.message();
+      });
+      LinkModules.clear();
+      return true;
+    }
+    LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+                           F.Internalize, F.LinkFlags});
+  }
+
+  return false; // success
+}
+
 // Links each entry in LinkModules into our module.  Returns true on error.
 bool BackendConsumer::LinkInModules(llvm::Module *M, bool ShouldLinkFiles) {
-
   for (auto &LM : LinkModules) {
     assert(LM.Module && "LinkModule does not actually have a module");
 
@@ -257,37 +281,19 @@ bool BackendConsumer::LinkInModules(llvm::Module *M, bool ShouldLinkFiles) {
     CurLinkModule = LM.Module.get();
     bool Err;
 
-    auto DoLink = [&](auto &Mod) {
-      if (LM.Internalize) {
-        Err = Linker::linkModules(
-            *M, std::move(Mod), LM.LinkFlags,
-            [](llvm::Module &M, const llvm::StringSet<> &GVS) {
-              internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) {
-                return !GV.hasName() || (GVS.count(GV.getName()) == 0);
-              });
+    if (LM.Internalize) {
+      Err = Linker::linkModules(
+          *M, std::move(LM.Module), LM.LinkFlags,
+          [](llvm::Module &M, const llvm::StringSet<> &GVS) {
+            internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) {
+              return !GV.hasName() || (GVS.count(GV.getName()) == 0);
             });
-      } else
-        Err = Linker::linkModules(*M, std::move(Mod), LM.LinkFlags);
-    };
-
-    // Create a Clone to move to the linker, which preserves the original
-    // linking modules, allowing them to be linked again in the future
-    if (ClRelinkBuiltinBitcodePostop) {
-      // TODO: If CloneModule() is updated to support cloning of unmaterialized
-      // modules, we can remove this
-      if (Error E = CurLinkModule->materializeAll())
-        return false;
-
-      std::unique_ptr<llvm::Module> Clone = llvm::CloneModule(*LM.Module);
-
-      DoLink(Clone);
-    }
-    // Otherwise we can link (and clean up) the original modules
-    else {
-      DoLink(LM.Module);
-    }
+          });
+    } else
+      Err = Linker::linkModules(*M, std::move(LM.Module), LM.LinkFlags);
   }
 
+  LinkModules.clear();
   return false; // success
 }
 
@@ -1037,8 +1043,9 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
       BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(),
       CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(),
-      CI.getTargetOpts(), CI.getLangOpts(), std::string(InFile),
-      std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo));
+      CI.getTargetOpts(), CI.getLangOpts(), CI.getFileManager(),
+      std::string(InFile), std::move(LinkModules), std::move(OS), *VMContext,
+      CoverageInfo));
   BEConsumer = Result.get();
 
   // Enable generating macro debug info only when debug info is not disabled and
@@ -1199,7 +1206,7 @@ void CodeGenAction::ExecuteAction() {
   BackendConsumer Result(BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(),
                          CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(),
                          CI.getCodeGenOpts(), CI.getTargetOpts(),
-                         CI.getLangOpts(), TheModule.get(),
+                         CI.getLangOpts(), CI.getFileManager(), TheModule.get(),
                          std::move(LinkModules), *VMContext, nullptr);
 
   // Link in each pending link module.

diff  --git a/clang/lib/CodeGen/LinkInModulesPass.cpp b/clang/lib/CodeGen/LinkInModulesPass.cpp
index 6ce2b94c1db82c..929539cc8f3346 100644
--- a/clang/lib/CodeGen/LinkInModulesPass.cpp
+++ b/clang/lib/CodeGen/LinkInModulesPass.cpp
@@ -14,6 +14,10 @@
 #include "LinkInModulesPass.h"
 #include "BackendConsumer.h"
 
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+
 using namespace llvm;
 
 LinkInModulesPass::LinkInModulesPass(clang::BackendConsumer *BC,
@@ -21,9 +25,15 @@ LinkInModulesPass::LinkInModulesPass(clang::BackendConsumer *BC,
     : BC(BC), ShouldLinkFiles(ShouldLinkFiles) {}
 
 PreservedAnalyses LinkInModulesPass::run(Module &M, ModuleAnalysisManager &AM) {
+  if (!BC)
+    return PreservedAnalyses::all();
+
+  // Re-load bitcode modules from files
+  if (BC->ReloadModules(&M))
+    report_fatal_error("Bitcode module re-loading failed, aborted!");
 
-  if (BC && BC->LinkInModules(&M, ShouldLinkFiles))
-    report_fatal_error("Bitcode module linking failed, compilation aborted!");
+  if (BC->LinkInModules(&M, ShouldLinkFiles))
+    report_fatal_error("Bitcode module re-linking failed, aborted!");
 
   return PreservedAnalyses::all();
 }


        


More information about the cfe-commits mailing list