[clang] [CodeGen] Implement post-opt linking option for builtin bitocdes (PR #69371)

Jacob Lambert via cfe-commits cfe-commits at lists.llvm.org
Wed Nov 15 20:26:53 PST 2023


================
@@ -48,428 +49,365 @@
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 
-#include <memory>
 #include <optional>
 using namespace clang;
 using namespace llvm;
 
 #define DEBUG_TYPE "codegenaction"
 
 namespace clang {
-  class BackendConsumer;
-  class ClangDiagnosticHandler final : public DiagnosticHandler {
-  public:
-    ClangDiagnosticHandler(const CodeGenOptions &CGOpts, BackendConsumer *BCon)
-        : CodeGenOpts(CGOpts), BackendCon(BCon) {}
+class BackendConsumer;
+class ClangDiagnosticHandler final : public DiagnosticHandler {
+public:
+  ClangDiagnosticHandler(const CodeGenOptions &CGOpts, BackendConsumer *BCon)
+      : CodeGenOpts(CGOpts), BackendCon(BCon) {}
 
-    bool handleDiagnostics(const DiagnosticInfo &DI) override;
+  bool handleDiagnostics(const DiagnosticInfo &DI) override;
 
-    bool isAnalysisRemarkEnabled(StringRef PassName) const override {
-      return CodeGenOpts.OptimizationRemarkAnalysis.patternMatches(PassName);
-    }
-    bool isMissedOptRemarkEnabled(StringRef PassName) const override {
-      return CodeGenOpts.OptimizationRemarkMissed.patternMatches(PassName);
-    }
-    bool isPassedOptRemarkEnabled(StringRef PassName) const override {
-      return CodeGenOpts.OptimizationRemark.patternMatches(PassName);
-    }
+  bool isAnalysisRemarkEnabled(StringRef PassName) const override {
+    return CodeGenOpts.OptimizationRemarkAnalysis.patternMatches(PassName);
+  }
+  bool isMissedOptRemarkEnabled(StringRef PassName) const override {
+    return CodeGenOpts.OptimizationRemarkMissed.patternMatches(PassName);
+  }
+  bool isPassedOptRemarkEnabled(StringRef PassName) const override {
+    return CodeGenOpts.OptimizationRemark.patternMatches(PassName);
+  }
 
-    bool isAnyRemarkEnabled() const override {
-      return CodeGenOpts.OptimizationRemarkAnalysis.hasValidPattern() ||
-             CodeGenOpts.OptimizationRemarkMissed.hasValidPattern() ||
-             CodeGenOpts.OptimizationRemark.hasValidPattern();
-    }
+  bool isAnyRemarkEnabled() const override {
+    return CodeGenOpts.OptimizationRemarkAnalysis.hasValidPattern() ||
+           CodeGenOpts.OptimizationRemarkMissed.hasValidPattern() ||
+           CodeGenOpts.OptimizationRemark.hasValidPattern();
+  }
 
-  private:
-    const CodeGenOptions &CodeGenOpts;
-    BackendConsumer *BackendCon;
-  };
+private:
+  const CodeGenOptions &CodeGenOpts;
+  BackendConsumer *BackendCon;
+};
+
+static void reportOptRecordError(Error E, DiagnosticsEngine &Diags,
+                                 const CodeGenOptions &CodeGenOpts) {
+  handleAllErrors(
+      std::move(E),
+    [&](const LLVMRemarkSetupFileError &E) {
+        Diags.Report(diag::err_cannot_open_file)
+            << CodeGenOpts.OptRecordFile << E.message();
+      },
+    [&](const LLVMRemarkSetupPatternError &E) {
+        Diags.Report(diag::err_drv_optimization_remark_pattern)
+            << E.message() << CodeGenOpts.OptRecordPasses;
+      },
+    [&](const LLVMRemarkSetupFormatError &E) {
+        Diags.Report(diag::err_drv_optimization_remark_format)
+            << CodeGenOpts.OptRecordFormat;
+      });
+}
 
-  static void reportOptRecordError(Error E, DiagnosticsEngine &Diags,
-                                   const CodeGenOptions &CodeGenOpts) {
-    handleAllErrors(
-        std::move(E),
-      [&](const LLVMRemarkSetupFileError &E) {
-          Diags.Report(diag::err_cannot_open_file)
-              << CodeGenOpts.OptRecordFile << E.message();
-        },
-      [&](const LLVMRemarkSetupPatternError &E) {
-          Diags.Report(diag::err_drv_optimization_remark_pattern)
-              << E.message() << CodeGenOpts.OptRecordPasses;
-        },
-      [&](const LLVMRemarkSetupFormatError &E) {
-          Diags.Report(diag::err_drv_optimization_remark_format)
-              << CodeGenOpts.OptRecordFormat;
-        });
-    }
+BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+                                 IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+                                 const HeaderSearchOptions &HeaderSearchOpts,
+                                 const PreprocessorOptions &PPOpts,
+                                 const CodeGenOptions &CodeGenOpts,
+                                 const TargetOptions &TargetOpts,
+                                 const LangOptions &LangOpts,
+                                 const std::string &InFile,
+                                 SmallVector<LinkModule, 4> LinkModules,
+                                 std::unique_ptr<raw_pwrite_stream> OS,
+                                 LLVMContext &C,
+                                 CoverageSourceInfo *CoverageInfo)
+  : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
+  CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
+  AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS),
+  LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
+  LLVMIRGenerationRefCount(0),
+  Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts,
+                        PPOpts, CodeGenOpts, C, CoverageInfo)),
+  LinkModules(std::move(LinkModules)) {
+    TimerIsEnabled = CodeGenOpts.TimePasses;
+    llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
+    llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
+}
 
-  class BackendConsumer : public ASTConsumer {
-    using LinkModule = CodeGenAction::LinkModule;
-
-    virtual void anchor();
-    DiagnosticsEngine &Diags;
-    BackendAction Action;
-    const HeaderSearchOptions &HeaderSearchOpts;
-    const CodeGenOptions &CodeGenOpts;
-    const TargetOptions &TargetOpts;
-    const LangOptions &LangOpts;
-    std::unique_ptr<raw_pwrite_stream> AsmOutStream;
-    ASTContext *Context;
-    IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
-
-    Timer LLVMIRGeneration;
-    unsigned LLVMIRGenerationRefCount;
-
-    /// True if we've finished generating IR. This prevents us from generating
-    /// additional LLVM IR after emitting output in HandleTranslationUnit. This
-    /// can happen when Clang plugins trigger additional AST deserialization.
-    bool IRGenFinished = false;
-
-    bool TimerIsEnabled = false;
-
-    std::unique_ptr<CodeGenerator> Gen;
-
-    SmallVector<LinkModule, 4> LinkModules;
-
-    // A map from mangled names to their function's source location, used for
-    // backend diagnostics as the Clang AST may be unavailable. We actually use
-    // the mangled name's hash as the key because mangled names can be very
-    // long and take up lots of space. Using a hash can cause name collision,
-    // but that is rare and the consequences are pointing to a wrong source
-    // location which is not severe. This is a vector instead of an actual map
-    // because we optimize for time building this map rather than time
-    // retrieving an entry, as backend diagnostics are uncommon.
-    std::vector<std::pair<llvm::hash_code, FullSourceLoc>>
-        ManglingFullSourceLocs;
-
-    // This is here so that the diagnostic printer knows the module a diagnostic
-    // refers to.
-    llvm::Module *CurLinkModule = nullptr;
-
-  public:
-    BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
-                    IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
-                    const HeaderSearchOptions &HeaderSearchOpts,
-                    const PreprocessorOptions &PPOpts,
-                    const CodeGenOptions &CodeGenOpts,
-                    const TargetOptions &TargetOpts,
-                    const LangOptions &LangOpts, const std::string &InFile,
-                    SmallVector<LinkModule, 4> LinkModules,
-                    std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
-                    CoverageSourceInfo *CoverageInfo = nullptr)
-        : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
-          CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
-          AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS),
-          LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
-          LLVMIRGenerationRefCount(0),
-          Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts,
-                                PPOpts, CodeGenOpts, C, CoverageInfo)),
-          LinkModules(std::move(LinkModules)) {
-      TimerIsEnabled = CodeGenOpts.TimePasses;
-      llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
-      llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
-    }
+// This constructor is used in installing an empty BackendConsumer
+// to use the clang diagnostic handler for IR input files. It avoids
+// initializing the OS field.
+BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+                                 IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+                                 const HeaderSearchOptions &HeaderSearchOpts,
+                                 const PreprocessorOptions &PPOpts,
+                                 const CodeGenOptions &CodeGenOpts,
+                                 const TargetOptions &TargetOpts,
+                                 const LangOptions &LangOpts,
+                                 llvm::Module *Module,
+                                 SmallVector<LinkModule, 4> LinkModules,
+                                 LLVMContext &C,
+                                 CoverageSourceInfo *CoverageInfo)
+  : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
+  CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
+  Context(nullptr), FS(VFS),
+  LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
+  LLVMIRGenerationRefCount(0),
+  Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts,
+                        PPOpts, CodeGenOpts, C, CoverageInfo)),
+  LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
+    TimerIsEnabled = CodeGenOpts.TimePasses;
+    llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
+    llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
+}
 
-    // This constructor is used in installing an empty BackendConsumer
-    // to use the clang diagnostic handler for IR input files. It avoids
-    // initializing the OS field.
-    BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
-                    IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
-                    const HeaderSearchOptions &HeaderSearchOpts,
-                    const PreprocessorOptions &PPOpts,
-                    const CodeGenOptions &CodeGenOpts,
-                    const TargetOptions &TargetOpts,
-                    const LangOptions &LangOpts, llvm::Module *Module,
-                    SmallVector<LinkModule, 4> LinkModules, LLVMContext &C,
-                    CoverageSourceInfo *CoverageInfo = nullptr)
-        : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
-          CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
-          Context(nullptr), FS(VFS),
-          LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
-          LLVMIRGenerationRefCount(0),
-          Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts,
-                                PPOpts, CodeGenOpts, C, CoverageInfo)),
-          LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
-      TimerIsEnabled = CodeGenOpts.TimePasses;
-      llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
-      llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
-    }
-    llvm::Module *getModule() const { return Gen->GetModule(); }
-    std::unique_ptr<llvm::Module> takeModule() {
-      return std::unique_ptr<llvm::Module>(Gen->ReleaseModule());
-    }
+llvm::Module* BackendConsumer::getModule() const {
+  return Gen->GetModule();
+}
 
-    CodeGenerator *getCodeGenerator() { return Gen.get(); }
+std::unique_ptr<llvm::Module> BackendConsumer::takeModule() {
+  return std::unique_ptr<llvm::Module>(Gen->ReleaseModule());
+}
 
-    void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
-      Gen->HandleCXXStaticMemberVarInstantiation(VD);
-    }
+CodeGenerator* BackendConsumer::getCodeGenerator() {
+  return Gen.get();
+}
 
-    void Initialize(ASTContext &Ctx) override {
-      assert(!Context && "initialized multiple times");
+void BackendConsumer::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
+  Gen->HandleCXXStaticMemberVarInstantiation(VD);
+}
 
-      Context = &Ctx;
+void BackendConsumer::Initialize(ASTContext &Ctx) {
+  assert(!Context && "initialized multiple times");
 
-      if (TimerIsEnabled)
-        LLVMIRGeneration.startTimer();
+  Context = &Ctx;
 
-      Gen->Initialize(Ctx);
+  if (TimerIsEnabled)
+    LLVMIRGeneration.startTimer();
 
-      if (TimerIsEnabled)
-        LLVMIRGeneration.stopTimer();
-    }
+  Gen->Initialize(Ctx);
 
-    bool HandleTopLevelDecl(DeclGroupRef D) override {
-      PrettyStackTraceDecl CrashInfo(*D.begin(), SourceLocation(),
-                                     Context->getSourceManager(),
-                                     "LLVM IR generation of declaration");
+  if (TimerIsEnabled)
+    LLVMIRGeneration.stopTimer();
+}
 
-      // Recurse.
-      if (TimerIsEnabled) {
-        LLVMIRGenerationRefCount += 1;
-        if (LLVMIRGenerationRefCount == 1)
-          LLVMIRGeneration.startTimer();
-      }
+bool BackendConsumer::HandleTopLevelDecl(DeclGroupRef D) {
+  PrettyStackTraceDecl CrashInfo(*D.begin(), SourceLocation(),
+                                 Context->getSourceManager(),
+                                 "LLVM IR generation of declaration");
 
-      Gen->HandleTopLevelDecl(D);
+  // Recurse.
+  if (TimerIsEnabled) {
+    LLVMIRGenerationRefCount += 1;
+    if (LLVMIRGenerationRefCount == 1)
+      LLVMIRGeneration.startTimer();
+  }
 
-      if (TimerIsEnabled) {
-        LLVMIRGenerationRefCount -= 1;
-        if (LLVMIRGenerationRefCount == 0)
-          LLVMIRGeneration.stopTimer();
-      }
+  Gen->HandleTopLevelDecl(D);
 
-      return true;
-    }
+  if (TimerIsEnabled) {
+    LLVMIRGenerationRefCount -= 1;
+    if (LLVMIRGenerationRefCount == 0)
+      LLVMIRGeneration.stopTimer();
+  }
 
-    void HandleInlineFunctionDefinition(FunctionDecl *D) override {
-      PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
-                                     Context->getSourceManager(),
-                                     "LLVM IR generation of inline function");
-      if (TimerIsEnabled)
-        LLVMIRGeneration.startTimer();
+  return true;
+}
 
-      Gen->HandleInlineFunctionDefinition(D);
+void BackendConsumer::HandleInlineFunctionDefinition(FunctionDecl *D) {
+  PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
+                                 Context->getSourceManager(),
+                                 "LLVM IR generation of inline function");
+  if (TimerIsEnabled)
+    LLVMIRGeneration.startTimer();
 
-      if (TimerIsEnabled)
-        LLVMIRGeneration.stopTimer();
-    }
+  Gen->HandleInlineFunctionDefinition(D);
 
-    void HandleInterestingDecl(DeclGroupRef D) override {
-      // Ignore interesting decls from the AST reader after IRGen is finished.
-      if (!IRGenFinished)
-        HandleTopLevelDecl(D);
-    }
+  if (TimerIsEnabled)
+    LLVMIRGeneration.stopTimer();
+}
 
-    // Links each entry in LinkModules into our module.  Returns true on error.
-    bool LinkInModules(llvm::Module *M) {
-      for (auto &LM : LinkModules) {
-        assert(LM.Module && "LinkModule does not actually have a module");
-        if (LM.PropagateAttrs)
-          for (Function &F : *LM.Module) {
-            // Skip intrinsics. Keep consistent with how intrinsics are created
-            // in LLVM IR.
-            if (F.isIntrinsic())
-              continue;
-            CodeGen::mergeDefaultFunctionDefinitionAttributes(
-                F, CodeGenOpts, LangOpts, TargetOpts, LM.Internalize);
-          }
-
-        CurLinkModule = LM.Module.get();
-
-        bool Err;
-        if (LM.Internalize) {
-          Err = Linker::linkModules(
-              *M, std::move(LM.Module), LM.LinkFlags,
-              [](llvm::Module &M, const llvm::StringSet<> &GVS) {
-                internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) {
-                  return !GV.hasName() || (GVS.count(GV.getName()) == 0);
-                });
-              });
-        } else {
-          Err = Linker::linkModules(*M, std::move(LM.Module), LM.LinkFlags);
-        }
+void BackendConsumer::HandleInterestingDecl(DeclGroupRef D) {
+  // Ignore interesting decls from the AST reader after IRGen is finished.
+  if (!IRGenFinished)
+    HandleTopLevelDecl(D);
+}
 
-        if (Err)
-          return true;
+// Links each entry in LinkModules into our module.  Returns true on error.
+bool BackendConsumer::LinkInModules(llvm::Module *M, bool ShouldLinkFiles) {
+
+  for (auto &LM : LinkModules) {
+    assert(LM.Module && "LinkModule does not actually have a module");
+
+    // If ShouldLinkFiles is not set, skip files added via the
+    // -mlink-bitcode-files, only linking -mlink-builtin-bitcode
+    if (!LM.Internalize && !ShouldLinkFiles)
+      continue;
+
+    if (LM.PropagateAttrs)
+      for (Function &F : *LM.Module) {
+        // Skip intrinsics. Keep consistent with how intrinsics are created
+        // in LLVM IR.
+        if (F.isIntrinsic())
+          continue;
+        CodeGen::mergeDefaultFunctionDefinitionAttributes(
+          F, CodeGenOpts, LangOpts, TargetOpts, LM.Internalize);
       }
-      LinkModules.clear();
-      return false; // success
-    }
 
-    void HandleTranslationUnit(ASTContext &C) override {
-      {
-        llvm::TimeTraceScope TimeScope("Frontend");
-        PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
-        if (TimerIsEnabled) {
-          LLVMIRGenerationRefCount += 1;
-          if (LLVMIRGenerationRefCount == 1)
-            LLVMIRGeneration.startTimer();
-        }
+    CurLinkModule = LM.Module.get();
+
+    // TODO: If CloneModule() is updated to support cloning of unmaterialized
+    // modules, we can remove this
+    bool Err;
+    if (Error E = CurLinkModule->materializeAll())
+      return false;
+
+    // Create a Clone to move to the linker, which preserves the original
+    // linking modules, allowing them to be linked again in the future
----------------
lamb-j wrote:

Good question, we definitely did not expect module cloning to have significant performance impacts for compilation.

One thing we considered was to only do module cloning if we expect to need the clones for a second link step. That may address your performance issue. I'll draft up a patch and link it here.

https://github.com/llvm/llvm-project/pull/69371


More information about the cfe-commits mailing list