[llvm] [clang] [clang][llvm][fatlto] Avoid cloning modules in FatLTO (PR #72180)

Paul Kirth via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 30 14:24:54 PST 2023


https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/72180

>From 6d971446fac3c65b1e7d48a7c9277b35133460ff Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Mon, 13 Nov 2023 23:54:51 +0000
Subject: [PATCH 1/8] [clang][llvm][fatlto] Avoid cloning modules in FatLTO

https://github.com/llvm/llvm-project/issues/70703 pointed out that
cloning LLVM modules could lead to miscompiles when using FatLTO.

This is due to an existing issue when cloning modules with labels
(see #55991 and #47769). Since this can lead to miscompilation,
we can avoid cloning the LLVM modules, which was desirable anyway.

This patch modifies the EmbedBitcodePass to no longer clone the module
or run an input pipeline over it. Further, it make FatLTO always perform
UnifiedLTO, so we can still defer the Thin/Full LTO decision to
link-time. Lastly, it removes dead/obsolete code related to now defunct
options that do not work with the EmbedBitcodePass implementation any
longer.
---
 clang/lib/CodeGen/BackendUtil.cpp             |  9 +++--
 clang/lib/Driver/ToolChains/Clang.cpp         |  4 ++-
 clang/test/CodeGen/fat-lto-objects.c          | 10 +++---
 llvm/docs/FatLTO.rst                          | 35 ++++++++++---------
 llvm/include/llvm/Passes/PassBuilder.h        |  3 +-
 .../llvm/Transforms/IPO/EmbedBitcodePass.h    | 17 +--------
 llvm/lib/Passes/PassBuilder.cpp               | 20 -----------
 llvm/lib/Passes/PassBuilderPipelines.cpp      | 11 +++---
 llvm/lib/Passes/PassRegistry.def              |  5 +--
 llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp  | 16 +--------
 llvm/test/CodeGen/X86/fat-lto-section.ll      |  2 +-
 llvm/test/Transforms/EmbedBitcode/embed.ll    |  3 --
 12 files changed, 39 insertions(+), 96 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index f01a6514f6effb0..724b0fc1d7c0a5d 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -810,7 +810,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
   // Only enable CGProfilePass when using integrated assembler, since
   // non-integrated assemblers don't recognize .cgprofile section.
   PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
-  PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO;
+  PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO || CodeGenOpts.FatLTO;
 
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;
@@ -996,9 +996,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
     }
 
     if (CodeGenOpts.FatLTO) {
-      MPM.addPass(PB.buildFatLTODefaultPipeline(
-          Level, PrepareForThinLTO,
-          PrepareForThinLTO || shouldEmitRegularLTOSummary()));
+      MPM.addPass(PB.buildFatLTODefaultPipeline(Level));
     } else if (PrepareForThinLTO) {
       MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(Level));
     } else if (PrepareForLTO) {
@@ -1073,7 +1071,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
     if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
       TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
                                uint32_t(CodeGenOpts.EnableSplitLTOUnit));
-    if (CodeGenOpts.UnifiedLTO && !TheModule->getModuleFlag("UnifiedLTO"))
+    // FatLTO always means UnifiedLTO
+    if (!TheModule->getModuleFlag("UnifiedLTO"))
       TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
   }
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 3c8df8a9037d695..b1ce95be38f88d5 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4862,7 +4862,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   bool UnifiedLTO = false;
   if (IsUsingLTO) {
     UnifiedLTO = Args.hasFlag(options::OPT_funified_lto,
-                              options::OPT_fno_unified_lto, Triple.isPS());
+                              options::OPT_fno_unified_lto, Triple.isPS()) ||
+                 Args.hasFlag(options::OPT_ffat_lto_objects,
+                              options::OPT_fno_fat_lto_objects, false);
     if (UnifiedLTO)
       CmdArgs.push_back("-funified-lto");
   }
diff --git a/clang/test/CodeGen/fat-lto-objects.c b/clang/test/CodeGen/fat-lto-objects.c
index 2c3a4ef9c615529..6085762fa5a2467 100644
--- a/clang/test/CodeGen/fat-lto-objects.c
+++ b/clang/test/CodeGen/fat-lto-objects.c
@@ -9,22 +9,22 @@
 // RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -fsplit-lto-unit -emit-obj < %s -o %t.full.split.o
 // RUN: llvm-readelf -S %t.full.split.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.full.split.bc %t.full.split.o
-// RUN: llvm-dis %t.full.split.bc -o - | FileCheck %s --check-prefixes=FULL,SPLIT,NOUNIFIED
+// RUN: llvm-dis %t.full.split.bc -o - | FileCheck %s --check-prefixes=FULL,SPLIT,UNIFIED
 
 // RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -emit-obj < %s -o %t.full.nosplit.o
 // RUN: llvm-readelf -S %t.full.nosplit.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.full.nosplit.bc %t.full.nosplit.o
-// RUN: llvm-dis %t.full.nosplit.bc -o - | FileCheck %s --check-prefixes=FULL,NOSPLIT,NOUNIFIED
+// RUN: llvm-dis %t.full.nosplit.bc -o - | FileCheck %s --check-prefixes=FULL,NOSPLIT,UNIFIED
 
 // RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -fsplit-lto-unit -ffat-lto-objects -emit-obj < %s -o %t.thin.split.o
 // RUN: llvm-readelf -S %t.thin.split.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.thin.split.bc %t.thin.split.o
-// RUN: llvm-dis %t.thin.split.bc -o - | FileCheck %s --check-prefixes=THIN,SPLIT,NOUNIFIED
+// RUN: llvm-dis %t.thin.split.bc -o - | FileCheck %s --check-prefixes=THIN,SPLIT,UNIFIED
 
 // RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -ffat-lto-objects -emit-obj < %s -o %t.thin.nosplit.o
 // RUN: llvm-readelf -S %t.thin.nosplit.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.thin.nosplit.bc %t.thin.nosplit.o
-// RUN: llvm-dis %t.thin.nosplit.bc -o -  | FileCheck %s --check-prefixes=THIN,NOSPLIT,NOUNIFIED
+// RUN: llvm-dis %t.thin.nosplit.bc -o -  | FileCheck %s --check-prefixes=THIN,NOSPLIT,UNIFIED
 
 // RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -funified-lto -ffat-lto-objects -emit-obj < %s -o %t.unified.o
 // RUN: llvm-readelf -S %t.unified.o | FileCheck %s --check-prefixes=ELF
@@ -42,8 +42,8 @@
 // SPLIT: ![[#]] = !{i32 1, !"EnableSplitLTOUnit", i32 1}
 // NOSPLIT: ![[#]] = !{i32 1, !"EnableSplitLTOUnit", i32 0}
 
+/// FatLTO always uses UnifiedLTO
 // UNIFIED: ![[#]] = !{i32 1, !"UnifiedLTO", i32 1}
-// NOUNIFIED-NOT: ![[#]] = !{i32 1, !"UnifiedLTO", i32 1}
 
 // ELF: .llvm.lto
 
diff --git a/llvm/docs/FatLTO.rst b/llvm/docs/FatLTO.rst
index b505bb2a96fe160..0e424f694b1bf09 100644
--- a/llvm/docs/FatLTO.rst
+++ b/llvm/docs/FatLTO.rst
@@ -29,30 +29,31 @@ Overview
 Within LLVM, FatLTO is supported by choosing the ``FatLTODefaultPipeline``.
 This pipeline will:
 
-#) Clone the IR module.
-#) Run the pre-link (Thin)LTO pipeline using the cloned module.
+#) Run the pre-link UnifiedLTO pipeline on the current module.
 #) Embed the pre-link bitcode in a special ``.llvm.lto`` section.
-#) Optimize the unmodified copy of the module using the normal compilation pipeline.
+#) Finish optimizing the module using the post-link ThinLTO pipeline.
 #) Emit the object file, including the new ``.llvm.lto`` section.
 
 .. NOTE
 
-   At the time of writing, we conservatively run independent pipelines to
-   generate the bitcode section and the object code, which happen to be
-   identical to those used outside of FatLTO. This results in  compiled
-   artifacts that are identical to those produced by the default and (Thin)LTO
-   pipelines. However, this is not a guarantee, and we reserve the right to
-   change this at any time. Explicitly, users should not rely on the produced
-   bitcode or object code to mach their non-LTO counterparts precisely. They
-   will exhibit similar performance characteristics, but may not be bit-for-bit
-   the same.
+   Previously, we conservatively ran independent pipelines on separate copies
+   of the LLVM module to generate the bitcode section and the object code,
+   which happen to be identical to those used outside of FatLTO. While that
+   resulted in  compiled artifacts that were identical to those produced by the
+   default and (Thin)LTO pipelines, module cloning led to some cases of
+   miscompilation, and we have moved away from trying to keep bitcode
+   generation and optimization completely disjoint.
+
+   Bit-for-bit compatibility is not (and never was) a guarantee, and we reserve
+   the right to change this at any time. Explicitly, users should not rely on
+   the produced bitcode or object code to mach their non-LTO counterparts
+   precisely. They will exhibit similar performance characteristics, but may
+   not be bit-for-bit the same.
 
 Internally, the ``.llvm.lto`` section is created by running the
-``EmbedBitcodePass`` at the start of the ``PerModuleDefaultPipeline``. This
-pass is responsible for cloning and optimizing the module with the appropriate
-LTO pipeline and emitting the ``.llvm.lto`` section. Afterwards, the
-``PerModuleDefaultPipeline`` runs normally and the compiler can emit the fat
-object file.
+``EmbedBitcodePass`` after the ``ThinLTOPreLinkDefaultPipeline``. This pass is
+responsible for emitting the ``.llvm.lto`` section. Afterwards, the
+``ThinLTODefaultPipeline`` runs and the compiler can emit the fat object file.
 
 Limitations
 ===========
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 23bc891a8f1e97c..19ac90842bcb08d 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -246,8 +246,7 @@ class PassBuilder {
   /// separately to avoid any inconsistencies with an ad-hoc pipeline that tries
   /// to approximate the PerModuleDefaultPipeline from the pre-link LTO
   /// pipelines.
-  ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level,
-                                               bool ThinLTO, bool EmitSummary);
+  ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level);
 
   /// Build a pre-link, ThinLTO-targeting default optimization pipeline to
   /// a pass manager.
diff --git a/llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h b/llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h
index f323c61483fd30a..c35048c91aba207 100644
--- a/llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h
+++ b/llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h
@@ -25,28 +25,13 @@ class Module;
 class ModulePass;
 class Pass;
 
-struct EmbedBitcodeOptions {
-  EmbedBitcodeOptions() : EmbedBitcodeOptions(false, false) {}
-  EmbedBitcodeOptions(bool IsThinLTO, bool EmitLTOSummary)
-      : IsThinLTO(IsThinLTO), EmitLTOSummary(EmitLTOSummary) {}
-  bool IsThinLTO;
-  bool EmitLTOSummary;
-};
-
 /// Pass embeds a copy of the module optimized with the provided pass pipeline
 /// into a global variable.
 class EmbedBitcodePass : public PassInfoMixin<EmbedBitcodePass> {
-  bool IsThinLTO;
-  bool EmitLTOSummary;
   ModulePassManager MPM;
 
 public:
-  EmbedBitcodePass(EmbedBitcodeOptions Opts)
-      : EmbedBitcodePass(Opts.IsThinLTO, Opts.EmitLTOSummary,
-                         ModulePassManager()) {}
-  EmbedBitcodePass(bool IsThinLTO, bool EmitLTOSummary, ModulePassManager &&MPM)
-      : IsThinLTO(IsThinLTO), EmitLTOSummary(EmitLTOSummary),
-        MPM(std::move(MPM)) {}
+  EmbedBitcodePass() {}
 
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
 
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index aeb9726a186b51e..98a679177c23dfe 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -759,26 +759,6 @@ Expected<HWAddressSanitizerOptions> parseHWASanPassOptions(StringRef Params) {
   return Result;
 }
 
-Expected<EmbedBitcodeOptions> parseEmbedBitcodePassOptions(StringRef Params) {
-  EmbedBitcodeOptions Result;
-  while (!Params.empty()) {
-    StringRef ParamName;
-    std::tie(ParamName, Params) = Params.split(';');
-
-    if (ParamName == "thinlto") {
-      Result.IsThinLTO = true;
-    } else if (ParamName == "emit-summary") {
-      Result.EmitLTOSummary = true;
-    } else {
-      return make_error<StringError>(
-          formatv("invalid EmbedBitcode pass parameter '{0}' ", ParamName)
-              .str(),
-          inconvertibleErrorCode());
-    }
-  }
-  return Result;
-}
-
 Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
   MemorySanitizerOptions Result;
   while (!Params.empty()) {
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index f3d280316e04077..80c191f880087c5 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1530,14 +1530,11 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
 }
 
 ModulePassManager
-PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
-                                        bool EmitSummary) {
+PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level) {
   ModulePassManager MPM;
-  MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary,
-                               ThinLTO
-                                   ? buildThinLTOPreLinkDefaultPipeline(Level)
-                                   : buildLTOPreLinkDefaultPipeline(Level)));
-  MPM.addPass(buildPerModuleDefaultPipeline(Level));
+  MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));
+  MPM.addPass(EmbedBitcodePass());
+  MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
   return MPM;
 }
 
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 1f1bc3222468b8a..2abc2920264a673 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -58,6 +58,7 @@ MODULE_PASS("dfsan", DataFlowSanitizerPass())
 MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
 MODULE_PASS("dxil-upgrade", DXILUpgradePass())
 MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
+MODULE_PASS("embed-bitcode", EmbedBitcodePass())
 MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
 MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
 MODULE_PASS("function-import", FunctionImportPass())
@@ -145,10 +146,6 @@ MODULE_PASS_WITH_PARAMS(
     "asan", "AddressSanitizerPass",
     [](AddressSanitizerOptions Opts) { return AddressSanitizerPass(Opts); },
     parseASanPassOptions, "kernel")
-MODULE_PASS_WITH_PARAMS(
-    "embed-bitcode", "EmbedBitcodePass",
-    [](EmbedBitcodeOptions Opts) { return EmbedBitcodePass(Opts); },
-    parseEmbedBitcodePassOptions, "thinlto;emit-summary")
 MODULE_PASS_WITH_PARAMS(
     "globaldce", "GlobalDCEPass",
     [](bool InLTOPostLink) { return GlobalDCEPass(InLTOPostLink); },
diff --git a/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp b/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp
index fa56a5b564ae668..48ef0772e800e72 100644
--- a/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp
+++ b/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp
@@ -7,8 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO/EmbedBitcodePass.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -16,10 +14,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
-#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
-#include <memory>
 #include <string>
 
 using namespace llvm;
@@ -34,19 +30,9 @@ PreservedAnalyses EmbedBitcodePass::run(Module &M, ModuleAnalysisManager &AM) {
     report_fatal_error(
         "EmbedBitcode pass currently only supports ELF object format",
         /*gen_crash_diag=*/false);
-
-  std::unique_ptr<Module> NewModule = CloneModule(M);
-  MPM.run(*NewModule, AM);
-
   std::string Data;
   raw_string_ostream OS(Data);
-  if (IsThinLTO)
-    ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr).run(*NewModule, AM);
-  else
-    BitcodeWriterPass(OS, /*ShouldPreserveUseListOrder=*/false, EmitLTOSummary)
-        .run(*NewModule, AM);
-
+  ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr).run(M, AM);
   embedBufferInModule(M, MemoryBufferRef(Data, "ModuleData"), ".llvm.lto");
-
   return PreservedAnalyses::all();
 }
diff --git a/llvm/test/CodeGen/X86/fat-lto-section.ll b/llvm/test/CodeGen/X86/fat-lto-section.ll
index 30c56229a0e2a31..9a4359bab6b5ddc 100644
--- a/llvm/test/CodeGen/X86/fat-lto-section.ll
+++ b/llvm/test/CodeGen/X86/fat-lto-section.ll
@@ -1,5 +1,5 @@
 ;; Ensure that the .llvm.lto section has SHT_EXCLUDE set.
-; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode<thinlto;emit-summary>" -S \
+; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode" -S \
 ; RUN:   | llc --mtriple x86_64-unknown-linux-gnu -filetype=obj \
 ; RUN:   | llvm-readelf - --sections \
 ; RUN:   | FileCheck %s --check-prefix=EXCLUDE
diff --git a/llvm/test/Transforms/EmbedBitcode/embed.ll b/llvm/test/Transforms/EmbedBitcode/embed.ll
index dffb5cf7554772a..734bf5274a5f2e5 100644
--- a/llvm/test/Transforms/EmbedBitcode/embed.ll
+++ b/llvm/test/Transforms/EmbedBitcode/embed.ll
@@ -1,7 +1,4 @@
 ; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode" -S | FileCheck %s
-; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode<thinlto>" -S | FileCheck %s
-; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode<emit-summary>" -S | FileCheck %s
-; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode<thinlto;emit-summary>" -S | FileCheck %s
 
 @a = global i32 1
 

>From 61a55fae1064cff715914f5c7a5f9f323be92d89 Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Tue, 28 Nov 2023 00:38:29 +0000
Subject: [PATCH 2/8] fixup! Properly handle CC1 options, and update tests

---
 clang/lib/CodeGen/BackendUtil.cpp         |  4 +--
 clang/lib/Frontend/CompilerInvocation.cpp |  7 ++++
 clang/test/CodeGen/fat-lto-objects.c      | 44 +++++++++++------------
 clang/test/Driver/fat-lto-objects.c       |  2 ++
 4 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 724b0fc1d7c0a5d..8c666e2cb463c6c 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -810,7 +810,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
   // Only enable CGProfilePass when using integrated assembler, since
   // non-integrated assemblers don't recognize .cgprofile section.
   PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
-  PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO || CodeGenOpts.FatLTO;
+  PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO;
 
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;
@@ -996,6 +996,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
     }
 
     if (CodeGenOpts.FatLTO) {
+      assert(CodeGenOpts.UnifiedLTO && "FatLTO requires UnifiedLTO");
       MPM.addPass(PB.buildFatLTODefaultPipeline(Level));
     } else if (PrepareForThinLTO) {
       MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(Level));
@@ -1040,7 +1041,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
         MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
                                     /*EmitLTOSummary=*/true));
       }
-
     } else {
       // Emit a module summary by default for Regular LTO except for ld64
       // targets
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 5a8e4cf9843de2b..9a8648c04d23f3d 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1861,6 +1861,13 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
     if (Args.hasArg(OPT_funified_lto))
       Opts.PrepareForThinLTO = true;
   }
+  if (Arg *A = Args.getLastArg(options::OPT_ffat_lto_objects,
+                               options::OPT_fno_fat_lto_objects)) {
+    if (!Args.hasArg(OPT_funified_lto))
+      Diags.Report(diag::err_drv_incompatible_options)
+          << A->getAsString(Args) << "-fno-unified-lto";
+  }
+
   if (Arg *A = Args.getLastArg(OPT_fthinlto_index_EQ)) {
     if (IK.getLanguage() != Language::LLVM_IR)
       Diags.Report(diag::err_drv_argument_only_allowed_with)
diff --git a/clang/test/CodeGen/fat-lto-objects.c b/clang/test/CodeGen/fat-lto-objects.c
index 6085762fa5a2467..50a5fc3acb0ba72 100644
--- a/clang/test/CodeGen/fat-lto-objects.c
+++ b/clang/test/CodeGen/fat-lto-objects.c
@@ -1,49 +1,47 @@
 // REQUIRES: x86-registered-target
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -fsplit-lto-unit -emit-llvm < %s  | FileCheck %s --check-prefixes=FULL,SPLIT
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=FULL,SPLIT
+// RUN: not %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -fsplit-lto-unit -emit-llvm < %s 2>&1 | FileCheck %s --check-prefixes=NO-UNIFIED
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -fsplit-lto-unit -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=THIN,SPLIT
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=THIN,NOSPLIT
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -funified-lto -ffat-lto-objects -fsplit-lto-unit -emit-llvm < %s  | FileCheck %s --check-prefixes=FULL,SPLIT,UNIFIED
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -funified-lto -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=FULL,NOSPLIT,UNIFIED
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -fsplit-lto-unit -emit-obj < %s -o %t.full.split.o
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -funified-lto -fsplit-lto-unit -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=THIN,SPLIT,UNIFIED
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -funified-lto -ffat-lto-objects -emit-llvm < %s  | FileCheck %s --check-prefixes=THIN,NOSPLIT,UNIFIED
+
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -funified-lto -ffat-lto-objects -fsplit-lto-unit -emit-obj < %s -o %t.full.split.o
 // RUN: llvm-readelf -S %t.full.split.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.full.split.bc %t.full.split.o
-// RUN: llvm-dis %t.full.split.bc -o - | FileCheck %s --check-prefixes=FULL,SPLIT,UNIFIED
+// RUN: llvm-dis %t.full.split.bc -o - | FileCheck %s --check-prefixes=THIN,SPLIT,UNIFIED
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -emit-obj < %s -o %t.full.nosplit.o
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -funified-lto -ffat-lto-objects -emit-obj < %s -o %t.full.nosplit.o
 // RUN: llvm-readelf -S %t.full.nosplit.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.full.nosplit.bc %t.full.nosplit.o
-// RUN: llvm-dis %t.full.nosplit.bc -o - | FileCheck %s --check-prefixes=FULL,NOSPLIT,UNIFIED
+// RUN: llvm-dis %t.full.nosplit.bc -o - | FileCheck %s --check-prefixes=THIN,NOSPLIT,UNIFIED
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -fsplit-lto-unit -ffat-lto-objects -emit-obj < %s -o %t.thin.split.o
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -funified-lto -fsplit-lto-unit -ffat-lto-objects -emit-obj < %s -o %t.thin.split.o
 // RUN: llvm-readelf -S %t.thin.split.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.thin.split.bc %t.thin.split.o
 // RUN: llvm-dis %t.thin.split.bc -o - | FileCheck %s --check-prefixes=THIN,SPLIT,UNIFIED
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -ffat-lto-objects -emit-obj < %s -o %t.thin.nosplit.o
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -funified-lto -ffat-lto-objects -emit-obj < %s -o %t.thin.nosplit.o
 // RUN: llvm-readelf -S %t.thin.nosplit.o | FileCheck %s --check-prefixes=ELF
 // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.thin.nosplit.bc %t.thin.nosplit.o
 // RUN: llvm-dis %t.thin.nosplit.bc -o -  | FileCheck %s --check-prefixes=THIN,NOSPLIT,UNIFIED
 
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -funified-lto -ffat-lto-objects -emit-obj < %s -o %t.unified.o
-// RUN: llvm-readelf -S %t.unified.o | FileCheck %s --check-prefixes=ELF
-// RUN: llvm-objcopy --dump-section=.llvm.lto=%t.unified.bc %t.unified.o
-// RUN: llvm-dis %t.unified.bc -o - | FileCheck %s --check-prefixes=THIN,NOSPLIT,UNIFIED
-
-// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -fsplit-lto-unit -S < %s -o - \
+// RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -funified-lto -ffat-lto-objects -fsplit-lto-unit -S < %s -o - \
 // RUN: | FileCheck %s --check-prefixes=ASM
 
-/// Check that the ThinLTO metadata is only set false for full LTO.
-// FULL: ![[#]] = !{i32 1, !"ThinLTO", i32 0}
-// THIN-NOT: ![[#]] = !{i32 1, !"ThinLTO", i32 0}
-
 /// Be sure we enable split LTO units correctly under -ffat-lto-objects.
-// SPLIT: ![[#]] = !{i32 1, !"EnableSplitLTOUnit", i32 1}
+//   SPLIT: ![[#]] = !{i32 1, !"EnableSplitLTOUnit", i32 1}
 // NOSPLIT: ![[#]] = !{i32 1, !"EnableSplitLTOUnit", i32 0}
 
-/// FatLTO always uses UnifiedLTO
-// UNIFIED: ![[#]] = !{i32 1, !"UnifiedLTO", i32 1}
+/// Check that the ThinLTO metadata is set true for both full and thin LTO, since FatLTO is based on UnifiedLTO.
+//     FULL: ![[#]] = !{i32 1, !"ThinLTO", i32 1}
+// THIN-NOT: ![[#]] = !{i32 1, !"ThinLTO", i32 0}
+
+/// FatLTO always uses UnifiedLTO. It's an error if they aren't set together
+//    UNIFIED: ![[#]] = !{i32 1, !"UnifiedLTO", i32 1}
+// NO-UNIFIED: error: the combination of '-ffat-lto-objects' and '-fno-unified-lto' is incompatible 
 
 // ELF: .llvm.lto
 
diff --git a/clang/test/Driver/fat-lto-objects.c b/clang/test/Driver/fat-lto-objects.c
index 887c33fa76d71f5..84f9416b163b7ad 100644
--- a/clang/test/Driver/fat-lto-objects.c
+++ b/clang/test/Driver/fat-lto-objects.c
@@ -1,5 +1,6 @@
 // RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -c 2>&1 | FileCheck %s -check-prefix=CHECK-CC
 // CHECK-CC: -cc1
+// CHECK-CC-SAME: -funified-lto
 // CHECK-CC-SAME: -emit-obj
 // CHECK-CC-SAME: -ffat-lto-objects
 
@@ -15,6 +16,7 @@
 // RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -S 2>&1 | FileCheck %s -check-prefix=CHECK-CC-S-LTO
 // RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -S -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-CC-S-LTO
 // CHECK-CC-S-LTO: -cc1
+// CHECK-CC-S-LTO-SAME: -funified-lto
 // CHECK-CC-S-LTO-SAME: -emit-llvm
 // CHECK-CC-S-LTO-SAME: -ffat-lto-objects
 

>From a8b2fe615a081280fa758007ac664594c8ba9095 Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Tue, 28 Nov 2023 02:05:04 +0000
Subject: [PATCH 3/8] fixup! Make error reporting more useful

---
 clang/lib/Frontend/CompilerInvocation.cpp | 13 ++++++++++---
 clang/test/CodeGen/fat-lto-objects.c      |  2 +-
 clang/test/Driver/fat-lto-objects.c       | 10 ++++++++++
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 9a8648c04d23f3d..a6188cb45e1787f 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1863,9 +1863,16 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
   }
   if (Arg *A = Args.getLastArg(options::OPT_ffat_lto_objects,
                                options::OPT_fno_fat_lto_objects)) {
-    if (!Args.hasArg(OPT_funified_lto))
-      Diags.Report(diag::err_drv_incompatible_options)
-          << A->getAsString(Args) << "-fno-unified-lto";
+    if (A->getOption().matches(options::OPT_ffat_lto_objects)) {
+      if (Arg *Uni = Args.getLastArg(options::OPT_funified_lto,
+                                     options::OPT_fno_unified_lto)) {
+        if (Uni->getOption().matches(options::OPT_fno_unified_lto))
+          Diags.Report(diag::err_drv_incompatible_options)
+              << A->getAsString(Args) << "-fno-unified-lto";
+      } else
+        Diags.Report(diag::err_drv_argument_only_allowed_with)
+            << A->getAsString(Args) << "-funified-lto";
+    }
   }
 
   if (Arg *A = Args.getLastArg(OPT_fthinlto_index_EQ)) {
diff --git a/clang/test/CodeGen/fat-lto-objects.c b/clang/test/CodeGen/fat-lto-objects.c
index 50a5fc3acb0ba72..95207e77c244cc9 100644
--- a/clang/test/CodeGen/fat-lto-objects.c
+++ b/clang/test/CodeGen/fat-lto-objects.c
@@ -41,7 +41,7 @@
 
 /// FatLTO always uses UnifiedLTO. It's an error if they aren't set together
 //    UNIFIED: ![[#]] = !{i32 1, !"UnifiedLTO", i32 1}
-// NO-UNIFIED: error: the combination of '-ffat-lto-objects' and '-fno-unified-lto' is incompatible 
+// NO-UNIFIED: error: invalid argument '-ffat-lto-objects' only allowed with '-funified-lto'
 
 // ELF: .llvm.lto
 
diff --git a/clang/test/Driver/fat-lto-objects.c b/clang/test/Driver/fat-lto-objects.c
index 84f9416b163b7ad..e02359db3f0ae0d 100644
--- a/clang/test/Driver/fat-lto-objects.c
+++ b/clang/test/Driver/fat-lto-objects.c
@@ -34,3 +34,13 @@
 // RUN:   -fuse-ld=lld -fno-lto -ffat-lto-objects -### 2>&1 | FileCheck --check-prefix=NOLTO %s
 // LTO: "--fat-lto-objects"
 // NOLTO-NOT: "--fat-lto-objects"
+
+/// Make sure that incompatible options emit the correct diagnostics, since -ffat-lto-objects requires -funified-lto
+// RUN: %clang -cc1 -triple=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -funified-lto -emit-llvm-only %s  2>&1 | FileCheck %s -check-prefix=UNIFIED --allow-empty
+// UNIFIED-NOT: error:
+
+// RUN: not %clang -cc1 -triple=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -emit-llvm-only %s  2>&1 | FileCheck %s -check-prefix=MISSING_UNIFIED
+// MISSING_UNIFIED: error: invalid argument '-ffat-lto-objects' only allowed with '-funified-lto'
+
+// RUN: not %clang -cc1 -triple=x86_64-unknown-linux-gnu -flto -fno-unified-lto -ffat-lto-objects -emit-llvm-only %s  2>&1 | FileCheck %s -check-prefix=NO-UNIFIED
+// NO-UNIFIED: error: the combination of '-ffat-lto-objects' and '-fno-unified-lto' is incompatible

>From 87aec2cdfb4784efd10d0974df95a90f4587e55d Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Tue, 28 Nov 2023 18:42:48 +0000
Subject: [PATCH 4/8] fixup! fix typo in docs

---
 llvm/docs/FatLTO.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/FatLTO.rst b/llvm/docs/FatLTO.rst
index 0e424f694b1bf09..21da24504bcdf62 100644
--- a/llvm/docs/FatLTO.rst
+++ b/llvm/docs/FatLTO.rst
@@ -46,7 +46,7 @@ This pipeline will:
 
    Bit-for-bit compatibility is not (and never was) a guarantee, and we reserve
    the right to change this at any time. Explicitly, users should not rely on
-   the produced bitcode or object code to mach their non-LTO counterparts
+   the produced bitcode or object code to match their non-LTO counterparts
    precisely. They will exhibit similar performance characteristics, but may
    not be bit-for-bit the same.
 

>From dc36d6d7f78cd93583e6520bbb0c3e694243fc7c Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Tue, 28 Nov 2023 19:09:20 +0000
Subject: [PATCH 5/8] fixup! Use ModuleOptimization over the ThinLTOPostLink
 pipeline

---
 llvm/lib/Passes/PassBuilderPipelines.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 80c191f880087c5..78add9b97ca3c08 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1532,9 +1532,13 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
 ModulePassManager
 PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level) {
   ModulePassManager MPM;
+  // FatLTO always uses UnifiedLTO, so use the ThinLTOPreLink pipeline
   MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));
   MPM.addPass(EmbedBitcodePass());
-  MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
+  MPM.addPass(buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
+
+  // Emit annotation remarks.
+  addAnnotationRemarksPass(MPM);
   return MPM;
 }
 

>From 6946e24d5f5241c5d90f6753027e4c6e6aa25bb2 Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Tue, 28 Nov 2023 20:59:47 +0000
Subject: [PATCH 6/8] fixup! Handle sample profiling with the ThinLTO pipeline

---
 llvm/lib/Passes/PassBuilderPipelines.cpp | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 78add9b97ca3c08..e7f88680655c12b 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1535,10 +1535,17 @@ PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level) {
   // FatLTO always uses UnifiedLTO, so use the ThinLTOPreLink pipeline
   MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));
   MPM.addPass(EmbedBitcodePass());
-  MPM.addPass(buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
 
-  // Emit annotation remarks.
-  addAnnotationRemarksPass(MPM);
+  // Use the ThinLTO post-link pipeline with sample profiling, other
+  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
+    MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
+  else {
+    // otherwise, just use module optimization
+    MPM.addPass(
+        buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
+    // Emit annotation remarks.
+    addAnnotationRemarksPass(MPM);
+  }
   return MPM;
 }
 

>From 310b46c6bba6f03ea81bd6f3a1e661f6c5ab6c8d Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Thu, 30 Nov 2023 18:48:29 +0000
Subject: [PATCH 7/8] fixup! Document limitations with SamplePGO

---
 llvm/docs/FatLTO.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/docs/FatLTO.rst b/llvm/docs/FatLTO.rst
index 21da24504bcdf62..2e6a66d18847730 100644
--- a/llvm/docs/FatLTO.rst
+++ b/llvm/docs/FatLTO.rst
@@ -58,6 +58,13 @@ responsible for emitting the ``.llvm.lto`` section. Afterwards, the
 Limitations
 ===========
 
+Sample-Based PGO
+----------------
+
+If FatLTO is used together with SamplePGO (as opposed to normal
+instrumentation-based PGO), some profile-based optimizations will only be
+applied when linking with LTO.
+
 Linkers
 -------
 

>From 93ce51e8ee41055d3fa0a6f5f614d741d22c2256 Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Thu, 30 Nov 2023 19:20:50 +0000
Subject: [PATCH 8/8] fixup! Correct documentation change.

---
 llvm/docs/FatLTO.rst | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/llvm/docs/FatLTO.rst b/llvm/docs/FatLTO.rst
index 2e6a66d18847730..21da24504bcdf62 100644
--- a/llvm/docs/FatLTO.rst
+++ b/llvm/docs/FatLTO.rst
@@ -58,13 +58,6 @@ responsible for emitting the ``.llvm.lto`` section. Afterwards, the
 Limitations
 ===========
 
-Sample-Based PGO
-----------------
-
-If FatLTO is used together with SamplePGO (as opposed to normal
-instrumentation-based PGO), some profile-based optimizations will only be
-applied when linking with LTO.
-
 Linkers
 -------
 



More information about the cfe-commits mailing list