[flang-commits] [clang-tools-extra] [llvm] [clang] [libc] [libcxx] [compiler-rt] [flang] [clang][FatLTO] Avoid UnifiedLTO until it can support WPD/CFI (PR #79061)

Paul Kirth via flang-commits flang-commits at lists.llvm.org
Tue Jan 23 10:18:55 PST 2024


https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/79061

>From 2e0cd6077a756fa28023d907045e6e53c50ec732 Mon Sep 17 00:00:00 2001
From: Paul Kirth <pk1574 at gmail.com>
Date: Mon, 22 Jan 2024 21:48:16 +0000
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 clang/lib/CodeGen/BackendUtil.cpp             |  9 ++--
 clang/lib/Driver/ToolChains/Clang.cpp         |  4 +-
 clang/lib/Frontend/CompilerInvocation.cpp     | 14 -------
 clang/test/CodeGen/fat-lto-objects.c          | 42 +++++++++----------
 clang/test/Driver/fat-lto-objects.c           | 12 ------
 llvm/docs/FatLTO.rst                          |  4 +-
 llvm/include/llvm/Passes/PassBuilder.h        |  3 +-
 .../llvm/Transforms/IPO/EmbedBitcodePass.h    | 16 ++++++-
 llvm/lib/Passes/PassBuilder.cpp               | 20 +++++++++
 llvm/lib/Passes/PassBuilderPipelines.cpp      | 37 +++++++++-------
 llvm/lib/Passes/PassRegistry.def              |  5 ++-
 llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp  | 12 +++++-
 llvm/test/CodeGen/X86/fat-lto-section.ll      |  2 +-
 llvm/test/Transforms/EmbedBitcode/embed.ll    |  3 ++
 14 files changed, 107 insertions(+), 76 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index a6142d99f3b688d..9369c8828231421 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1001,8 +1001,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
     }
 
     if (CodeGenOpts.FatLTO) {
-      assert(CodeGenOpts.UnifiedLTO && "FatLTO requires UnifiedLTO");
-      MPM.addPass(PB.buildFatLTODefaultPipeline(Level));
+      MPM.addPass(PB.buildFatLTODefaultPipeline(
+          Level, PrepareForThinLTO,
+          PrepareForThinLTO || shouldEmitRegularLTOSummary()));
     } else if (PrepareForThinLTO) {
       MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(Level));
     } else if (PrepareForLTO) {
@@ -1046,6 +1047,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
         MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
                                     /*EmitLTOSummary=*/true));
       }
+
     } else {
       // Emit a module summary by default for Regular LTO except for ld64
       // targets
@@ -1073,8 +1075,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
     if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
       TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
                                uint32_t(CodeGenOpts.EnableSplitLTOUnit));
-    // FatLTO always means UnifiedLTO
-    if (!TheModule->getModuleFlag("UnifiedLTO"))
+    if (CodeGenOpts.UnifiedLTO && !TheModule->getModuleFlag("UnifiedLTO"))
       TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
   }
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 2f33943de45c5da..928dca653dabdc1 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4854,9 +4854,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   bool UnifiedLTO = false;
   if (IsUsingLTO) {
     UnifiedLTO = Args.hasFlag(options::OPT_funified_lto,
-                              options::OPT_fno_unified_lto, Triple.isPS()) ||
-                 Args.hasFlag(options::OPT_ffat_lto_objects,
-                              options::OPT_fno_fat_lto_objects, false);
+                              options::OPT_fno_unified_lto, Triple.isPS());
     if (UnifiedLTO)
       CmdArgs.push_back("-funified-lto");
   }
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 7edea7798af1ef0..feb4de2084b830b 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1861,20 +1861,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
     if (Args.hasArg(OPT_funified_lto))
       Opts.PrepareForThinLTO = true;
   }
-  if (Arg *A = Args.getLastArg(options::OPT_ffat_lto_objects,
-                               options::OPT_fno_fat_lto_objects)) {
-    if (A->getOption().matches(options::OPT_ffat_lto_objects)) {
-      if (Arg *Uni = Args.getLastArg(options::OPT_funified_lto,
-                                     options::OPT_fno_unified_lto)) {
-        if (Uni->getOption().matches(options::OPT_fno_unified_lto))
-          Diags.Report(diag::err_drv_incompatible_options)
-              << A->getAsString(Args) << "-fno-unified-lto";
-      } else
-        Diags.Report(diag::err_drv_argument_only_allowed_with)
-            << A->getAsString(Args) << "-funified-lto";
-    }
-  }
-
   if (Arg *A = Args.getLastArg(OPT_fthinlto_index_EQ)) {
     if (IK.getLanguage() != Language::LLVM_IR)
       Diags.Report(diag::err_drv_argument_only_allowed_with)
diff --git a/clang/test/CodeGen/fat-lto-objects.c b/clang/test/CodeGen/fat-lto-objects.c
index 5c8ad1fd93c4b38..e3674d51c7f442b 100644
--- a/clang/test/CodeGen/fat-lto-objects.c
+++ b/clang/test/CodeGen/fat-lto-objects.c
@@ -1,46 +1,46 @@
 // REQUIRES: x86-registered-target
 
-// RUN: not %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -fsplit-lto-unit -emit-llvm < %s 2>&1 | FileCheck %s --check-prefixes=NO-UNIFIED
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -fsplit-lto-unit -emit-llvm < %s  | FileCheck %s --check-prefixes=FULL,SPLIT
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=FULL,SPLIT
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -funified-lto -ffat-lto-objects -fsplit-lto-unit -emit-llvm < %s  | FileCheck %s --check-prefixes=FULL,SPLIT,UNIFIED
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -funified-lto -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=FULL,NOSPLIT,UNIFIED
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -fsplit-lto-unit -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=THIN,SPLIT
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=THIN,NOSPLIT
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -funified-lto -fsplit-lto-unit -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=THIN,SPLIT,UNIFIED
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -funified-lto -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=THIN,NOSPLIT,UNIFIED
-
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -funified-lto -ffat-lto-objects -fsplit-lto-unit -emit-obj < %s -o %t.full.split.o
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -fsplit-lto-unit -emit-obj < %s -o %t.full.split.o
 // RUN: llvm-readelf -S %t.full.split.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.full.split.bc %t.full.split.o
-// RUN: llvm-dis %t.full.split.bc -o - | FileCheck %s --check-prefixes=THIN,SPLIT,UNIFIED
+// RUN: llvm-dis %t.full.split.bc -o - | FileCheck %s --check-prefixes=FULL,SPLIT,NOUNIFIED
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -funified-lto -ffat-lto-objects -emit-obj < %s -o %t.full.nosplit.o
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -emit-obj < %s -o %t.full.nosplit.o
 // RUN: llvm-readelf -S %t.full.nosplit.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.full.nosplit.bc %t.full.nosplit.o
-// RUN: llvm-dis %t.full.nosplit.bc -o - | FileCheck %s --check-prefixes=THIN,NOSPLIT,UNIFIED
+// RUN: llvm-dis %t.full.nosplit.bc -o - | FileCheck %s --check-prefixes=FULL,NOSPLIT,NOUNIFIED
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -funified-lto -fsplit-lto-unit -ffat-lto-objects -emit-obj < %s -o %t.thin.split.o
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -fsplit-lto-unit -ffat-lto-objects -emit-obj < %s -o %t.thin.split.o
 // RUN: llvm-readelf -S %t.thin.split.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.thin.split.bc %t.thin.split.o
-// RUN: llvm-dis %t.thin.split.bc -o - | FileCheck %s --check-prefixes=THIN,SPLIT,UNIFIED
+// RUN: llvm-dis %t.thin.split.bc -o - | FileCheck %s --check-prefixes=THIN,SPLIT,NOUNIFIED
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -funified-lto -ffat-lto-objects -emit-obj < %s -o %t.thin.nosplit.o
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -ffat-lto-objects -emit-obj < %s -o %t.thin.nosplit.o
 // RUN: llvm-readelf -S %t.thin.nosplit.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.thin.nosplit.bc %t.thin.nosplit.o
-// RUN: llvm-dis %t.thin.nosplit.bc -o -  | FileCheck %s --check-prefixes=THIN,NOSPLIT,UNIFIED
+// RUN: llvm-dis %t.thin.nosplit.bc -o -  | FileCheck %s --check-prefixes=THIN,NOSPLIT,NOUNIFIED
+
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -funified-lto -ffat-lto-objects -emit-obj < %s -o %t.unified.o
+// RUN: llvm-readelf -S %t.unified.o | FileCheck %s --check-prefixes=ELF
+// RUN: llvm-objcopy --dump-section=.llvm.lto=%t.unified.bc %t.unified.o
+// RUN: llvm-dis %t.unified.bc -o - | FileCheck %s --check-prefixes=THIN,NOSPLIT,UNIFIED
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -funified-lto -ffat-lto-objects -fsplit-lto-unit -S < %s -o - \
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -fsplit-lto-unit -S < %s -o - \
 // RUN: | FileCheck %s --check-prefixes=ASM
 
+<<<<<<< HEAD
 /// Be sure we enable split LTO units correctly under -ffat-lto-objects.
 //   SPLIT: ![[#]] = !{i32 1, !"EnableSplitLTOUnit", i32 1}
 // NOSPLIT: ![[#]] = !{i32 1, !"EnableSplitLTOUnit", i32 0}
 
-// FULL-NOT: ![[#]] = !{i32 1, !"ThinLTO", i32 0}
-// THIN-NOT: ![[#]] = !{i32 1, !"ThinLTO", i32 0}
-
-/// FatLTO always uses UnifiedLTO. It's an error if they aren't set together
-//    UNIFIED: ![[#]] = !{i32 1, !"UnifiedLTO", i32 1}
-// NO-UNIFIED: error: invalid argument '-ffat-lto-objects' only allowed with '-funified-lto'
+// UNIFIED: ![[#]] = !{i32 1, !"UnifiedLTO", i32 1}
+// NOUNIFIED-NOT: ![[#]] = !{i32 1, !"UnifiedLTO", i32 1}
 
 // ELF: .llvm.lto
 
diff --git a/clang/test/Driver/fat-lto-objects.c b/clang/test/Driver/fat-lto-objects.c
index e02359db3f0ae0d..887c33fa76d71f5 100644
--- a/clang/test/Driver/fat-lto-objects.c
+++ b/clang/test/Driver/fat-lto-objects.c
@@ -1,6 +1,5 @@
 // RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -c 2>&1 | FileCheck %s -check-prefix=CHECK-CC
 // CHECK-CC: -cc1
-// CHECK-CC-SAME: -funified-lto
 // CHECK-CC-SAME: -emit-obj
 // CHECK-CC-SAME: -ffat-lto-objects
 
@@ -16,7 +15,6 @@
 // RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -S 2>&1 | FileCheck %s -check-prefix=CHECK-CC-S-LTO
 // RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -S -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-CC-S-LTO
 // CHECK-CC-S-LTO: -cc1
-// CHECK-CC-S-LTO-SAME: -funified-lto
 // CHECK-CC-S-LTO-SAME: -emit-llvm
 // CHECK-CC-S-LTO-SAME: -ffat-lto-objects
 
@@ -34,13 +32,3 @@
 // RUN:   -fuse-ld=lld -fno-lto -ffat-lto-objects -### 2>&1 | FileCheck --check-prefix=NOLTO %s
 // LTO: "--fat-lto-objects"
 // NOLTO-NOT: "--fat-lto-objects"
-
-/// Make sure that incompatible options emit the correct diagnostics, since -ffat-lto-objects requires -funified-lto
-// RUN: %clang -cc1 -triple=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -funified-lto -emit-llvm-only %s  2>&1 | FileCheck %s -check-prefix=UNIFIED --allow-empty
-// UNIFIED-NOT: error:
-
-// RUN: not %clang -cc1 -triple=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -emit-llvm-only %s  2>&1 | FileCheck %s -check-prefix=MISSING_UNIFIED
-// MISSING_UNIFIED: error: invalid argument '-ffat-lto-objects' only allowed with '-funified-lto'
-
-// RUN: not %clang -cc1 -triple=x86_64-unknown-linux-gnu -flto -fno-unified-lto -ffat-lto-objects -emit-llvm-only %s  2>&1 | FileCheck %s -check-prefix=NO-UNIFIED
-// NO-UNIFIED: error: the combination of '-ffat-lto-objects' and '-fno-unified-lto' is incompatible
diff --git a/llvm/docs/FatLTO.rst b/llvm/docs/FatLTO.rst
index 76b849b16fc9679..5864944332fc074 100644
--- a/llvm/docs/FatLTO.rst
+++ b/llvm/docs/FatLTO.rst
@@ -29,9 +29,9 @@ Overview
 Within LLVM, FatLTO is supported by choosing the ``FatLTODefaultPipeline``.
 This pipeline will:
 
-#) Run the pre-link UnifiedLTO pipeline on the current module.
+#) Run the pre-link (Thin)LTO pipeline on the current module.
 #) Embed the pre-link bitcode in a special ``.llvm.lto`` section.
-#) Finish optimizing the module using the post-link ThinLTO pipeline.
+#) Finish optimizing the module using the ModuleOptimization pipeline.
 #) Emit the object file, including the new ``.llvm.lto`` section.
 
 .. NOTE
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 33cf8af87381fb2..bb1d9cd82228879 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -255,7 +255,8 @@ class PassBuilder {
   /// separately to avoid any inconsistencies with an ad-hoc pipeline that tries
   /// to approximate the PerModuleDefaultPipeline from the pre-link LTO
   /// pipelines.
-  ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level);
+  ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level,
+                                               bool ThinLTO, bool EmitSummary);
 
   /// Build a pre-link, ThinLTO-targeting default optimization pipeline to
   /// a pass manager.
diff --git a/llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h b/llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h
index 2bb7d5f1fcf1493..12bf0dd6581ce3d 100644
--- a/llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h
+++ b/llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h
@@ -24,13 +24,25 @@ namespace llvm {
 class Module;
 class Pass;
 
+struct EmbedBitcodeOptions {
+  EmbedBitcodeOptions() : EmbedBitcodeOptions(false, false) {}
+  EmbedBitcodeOptions(bool IsThinLTO, bool EmitLTOSummary)
+      : IsThinLTO(IsThinLTO), EmitLTOSummary(EmitLTOSummary) {}
+  bool IsThinLTO;
+  bool EmitLTOSummary;
+};
+
 /// Pass embeds a copy of the module optimized with the provided pass pipeline
 /// into a global variable.
 class EmbedBitcodePass : public PassInfoMixin<EmbedBitcodePass> {
-  ModulePassManager MPM;
+  bool IsThinLTO;
+  bool EmitLTOSummary;
 
 public:
-  EmbedBitcodePass() {}
+  EmbedBitcodePass(EmbedBitcodeOptions Opts)
+      : EmbedBitcodePass(Opts.IsThinLTO, Opts.EmitLTOSummary) {}
+  EmbedBitcodePass(bool IsThinLTO, bool EmitLTOSummary)
+      : IsThinLTO(IsThinLTO), EmitLTOSummary(EmitLTOSummary) {}
 
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
 
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 19fb136f37563a8..000594f0e7f4b51 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -752,6 +752,26 @@ Expected<HWAddressSanitizerOptions> parseHWASanPassOptions(StringRef Params) {
   return Result;
 }
 
+Expected<EmbedBitcodeOptions> parseEmbedBitcodePassOptions(StringRef Params) {
+  EmbedBitcodeOptions Result;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+
+    if (ParamName == "thinlto") {
+      Result.IsThinLTO = true;
+    } else if (ParamName == "emit-summary") {
+      Result.EmitLTOSummary = true;
+    } else {
+      return make_error<StringError>(
+          formatv("invalid EmbedBitcode pass parameter '{0}' ", ParamName)
+              .str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return Result;
+}
+
 Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
   MemorySanitizerOptions Result;
   while (!Params.empty()) {
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 525b83dee79b0dd..fea9dfe711f8d07 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1531,22 +1531,31 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
 }
 
 ModulePassManager
-PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level) {
+PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
+                                        bool EmitSummary) {
   ModulePassManager MPM;
-  // FatLTO always uses UnifiedLTO, so use the ThinLTOPreLink pipeline
-  MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));
-  MPM.addPass(EmbedBitcodePass());
-
-  // Use the ThinLTO post-link pipeline with sample profiling, other
-  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
-    MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
-  else {
-    // otherwise, just use module optimization
-    MPM.addPass(
-        buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
-    // Emit annotation remarks.
-    addAnnotationRemarksPass(MPM);
+  if (ThinLTO)
+    MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));
+  else
+    MPM.addPass(buildLTOPreLinkDefaultPipeline(Level));
+  MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
+
+  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
+    if (ThinLTO) {
+      // Use the ThinLTO post-link pipeline with sample profiling
+      MPM.addPass(
+          buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
+      return MPM;
+    }
+    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
+                                        PGOOpt->ProfileRemappingFile,
+                                        ThinOrFullLTOPhase::FullLTOPostLink));
   }
+  MPM.addPass(buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
+
+  // Emit annotation remarks.
+  addAnnotationRemarksPass(MPM);
+
   return MPM;
 }
 
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 5f2530209112417..e59795c7b0840ef 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -58,7 +58,6 @@ MODULE_PASS("dfsan", DataFlowSanitizerPass())
 MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
 MODULE_PASS("dxil-upgrade", DXILUpgradePass())
 MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
-MODULE_PASS("embed-bitcode", EmbedBitcodePass())
 MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
 MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
 MODULE_PASS("function-import", FunctionImportPass())
@@ -161,6 +160,10 @@ MODULE_PASS_WITH_PARAMS(
     "group-by-use;ignore-single-use;max-offset=N;merge-const;merge-external;"
     "no-group-by-use;no-ignore-single-use;no-merge-const;no-merge-external;"
     "size-only")
+MODULE_PASS_WITH_PARAMS(
+    "embed-bitcode", "EmbedBitcodePass",
+    [](EmbedBitcodeOptions Opts) { return EmbedBitcodePass(Opts); },
+    parseEmbedBitcodePassOptions, "thinlto;emit-summary")
 MODULE_PASS_WITH_PARAMS(
     "globaldce", "GlobalDCEPass",
     [](bool InLTOPostLink) { return GlobalDCEPass(InLTOPostLink); },
diff --git a/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp b/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp
index 48ef0772e800e72..6af3a45701bc51c 100644
--- a/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp
+++ b/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO/EmbedBitcodePass.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -16,6 +18,7 @@
 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
+#include <memory>
 #include <string>
 
 using namespace llvm;
@@ -30,9 +33,16 @@ PreservedAnalyses EmbedBitcodePass::run(Module &M, ModuleAnalysisManager &AM) {
     report_fatal_error(
         "EmbedBitcode pass currently only supports ELF object format",
         /*gen_crash_diag=*/false);
+
   std::string Data;
   raw_string_ostream OS(Data);
-  ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr).run(M, AM);
+  if (IsThinLTO)
+    ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr).run(M, AM);
+  else
+    BitcodeWriterPass(OS, /*ShouldPreserveUseListOrder=*/false, EmitLTOSummary)
+        .run(M, AM);
+
   embedBufferInModule(M, MemoryBufferRef(Data, "ModuleData"), ".llvm.lto");
+
   return PreservedAnalyses::all();
 }
diff --git a/llvm/test/CodeGen/X86/fat-lto-section.ll b/llvm/test/CodeGen/X86/fat-lto-section.ll
index 9a4359bab6b5ddc..30c56229a0e2a31 100644
--- a/llvm/test/CodeGen/X86/fat-lto-section.ll
+++ b/llvm/test/CodeGen/X86/fat-lto-section.ll
@@ -1,5 +1,5 @@
 ;; Ensure that the .llvm.lto section has SHT_EXCLUDE set.
-; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode" -S \
+; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode<thinlto;emit-summary>" -S \
 ; RUN:   | llc --mtriple x86_64-unknown-linux-gnu -filetype=obj \
 ; RUN:   | llvm-readelf - --sections \
 ; RUN:   | FileCheck %s --check-prefix=EXCLUDE
diff --git a/llvm/test/Transforms/EmbedBitcode/embed.ll b/llvm/test/Transforms/EmbedBitcode/embed.ll
index 734bf5274a5f2e5..dffb5cf7554772a 100644
--- a/llvm/test/Transforms/EmbedBitcode/embed.ll
+++ b/llvm/test/Transforms/EmbedBitcode/embed.ll
@@ -1,4 +1,7 @@
 ; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode" -S | FileCheck %s
+; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode<thinlto>" -S | FileCheck %s
+; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode<emit-summary>" -S | FileCheck %s
+; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode<thinlto;emit-summary>" -S | FileCheck %s
 
 @a = global i32 1
 

>From 7a52c8cade1916893b3cf7e3ae6824b076afe256 Mon Sep 17 00:00:00 2001
From: Paul Kirth <pk1574 at gmail.com>
Date: Mon, 22 Jan 2024 23:27:06 +0000
Subject: [PATCH 2/4] Adress comments and fix doc string

Created using spr 1.3.4
---
 clang/lib/CodeGen/BackendUtil.cpp        |  1 -
 llvm/include/llvm/Passes/PassBuilder.h   |  6 +-----
 llvm/lib/Passes/PassBuilderPipelines.cpp | 24 +++++++++---------------
 3 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 9369c8828231421..ec203f6f28bc173 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1047,7 +1047,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
         MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
                                     /*EmitLTOSummary=*/true));
       }
-
     } else {
       // Emit a module summary by default for Regular LTO except for ld64
       // targets
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index bb1d9cd82228879..7339b8a988232d8 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -250,11 +250,7 @@ class PassBuilder {
   ///
   /// This builds a pipeline that runs the LTO/ThinLTO  pre-link pipeline, and
   /// emits a section containing the pre-link bitcode along side the object code
-  /// generated by running the PerModuleDefaultPipeline, used when compiling
-  /// without LTO. It clones the module and runs the LTO/non-LTO pipelines
-  /// separately to avoid any inconsistencies with an ad-hoc pipeline that tries
-  /// to approximate the PerModuleDefaultPipeline from the pre-link LTO
-  /// pipelines.
+  /// generated in non-LTO compilation.
   ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level,
                                                bool ThinLTO, bool EmitSummary);
 
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index fea9dfe711f8d07..6ede86382912066 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1540,22 +1540,16 @@ PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
     MPM.addPass(buildLTOPreLinkDefaultPipeline(Level));
   MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
 
-  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
-    if (ThinLTO) {
-      // Use the ThinLTO post-link pipeline with sample profiling
-      MPM.addPass(
-          buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
-      return MPM;
-    }
-    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
-                                        PGOOpt->ProfileRemappingFile,
-                                        ThinOrFullLTOPhase::FullLTOPostLink));
+  // Use the ThinLTO post-link pipeline with sample profiling
+  if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
+    MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
+  else {
+    // otherwise, just use module optimization
+    MPM.addPass(
+        buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
+    // Emit annotation remarks.
+    addAnnotationRemarksPass(MPM);
   }
-  MPM.addPass(buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
-
-  // Emit annotation remarks.
-  addAnnotationRemarksPass(MPM);
-
   return MPM;
 }
 

>From 43443c192133b4251eeff40e607251f5a23d82b3 Mon Sep 17 00:00:00 2001
From: Paul Kirth <pk1574 at gmail.com>
Date: Tue, 23 Jan 2024 01:32:15 +0000
Subject: [PATCH 3/4] Fix clang test file & restore deleted check lines

Created using spr 1.3.4
---
 clang/test/CodeGen/fat-lto-objects.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/fat-lto-objects.c b/clang/test/CodeGen/fat-lto-objects.c
index e3674d51c7f442b..afce798c5c81945 100644
--- a/clang/test/CodeGen/fat-lto-objects.c
+++ b/clang/test/CodeGen/fat-lto-objects.c
@@ -34,11 +34,13 @@
 // RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -fsplit-lto-unit -S < %s -o - \
 // RUN: | FileCheck %s --check-prefixes=ASM
 
-<<<<<<< HEAD
 /// Be sure we enable split LTO units correctly under -ffat-lto-objects.
 //   SPLIT: ![[#]] = !{i32 1, !"EnableSplitLTOUnit", i32 1}
 // NOSPLIT: ![[#]] = !{i32 1, !"EnableSplitLTOUnit", i32 0}
 
+// FULL-NOT: ![[#]] = !{i32 1, !"ThinLTO", i32 0}
+// THIN-NOT: ![[#]] = !{i32 1, !"ThinLTO", i32 0}
+
 // UNIFIED: ![[#]] = !{i32 1, !"UnifiedLTO", i32 1}
 // NOUNIFIED-NOT: ![[#]] = !{i32 1, !"UnifiedLTO", i32 1}
 

>From 89f632c30b517c21fbc80b2a660bed4f3665acff Mon Sep 17 00:00:00 2001
From: Paul Kirth <pk1574 at gmail.com>
Date: Tue, 23 Jan 2024 18:18:37 +0000
Subject: [PATCH 4/4] Fix remaining -unified-lto reference in test

Created using spr 1.3.4
---
 clang/test/Driver/fat-lto-objects.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/test/Driver/fat-lto-objects.c b/clang/test/Driver/fat-lto-objects.c
index 1fa2f00a19e62b5..97002db6edc51e5 100644
--- a/clang/test/Driver/fat-lto-objects.c
+++ b/clang/test/Driver/fat-lto-objects.c
@@ -20,14 +20,12 @@
 /// When fat LTO is enabled with -S and -emit-llvm, we expect IR output and -ffat-lto-objects to be passed to cc1.
 // RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -S -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-CC-S-EL-LTO
 // CHECK-CC-S-EL-LTO: -cc1
-// CHECK-CC-S-EL-LTO-SAME: -funified-lto
 // CHECK-CC-S-EL-LTO-SAME: -emit-llvm
 // CHECK-CC-S-EL-LTO-SAME: -ffat-lto-objects
 
 /// When fat LTO is enabled wihtout -S we expect native object output and -ffat-lto-object to be passed to cc1.
 // RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -c 2>&1 | FileCheck %s -check-prefix=CHECK-CC-C-LTO
 // CHECK-CC-C-LTO: -cc1
-// CHECK-CC-C-LTO: -funified-lto
 // CHECK-CC-C-LTO: -emit-obj
 // CHECK-CC-C-LTO: -ffat-lto-objects
 



More information about the flang-commits mailing list