[lld] [clang] [llvm] Embed the command line arguments during LTO (PR #79390)

Duncan Ogilvie via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 24 15:23:02 PST 2024


https://github.com/mrexodia created https://github.com/llvm/llvm-project/pull/79390

This was previously marked as `FIXME` in `LTOBackend.cpp`.

List of changes:
- Removed the unused `lto::Config.MllvmArgs` field
- Add a new `lto::Config.EmbedCmdArgs` field (`\0`-separated list, since that's how the `.llvmcmd` works currently)
- Completely get rid of the separate `CmdArgs` argument from the various LTO functions (since it now uses `Conf.EmbedCmdArgs` internally instead)
- Add a new `cmdArgs` member to the `CommonLinkerContext`
- Store the `opt::ArgList` in the `commonContext()` for every driver

Potentially controversial changes:
1. `LTOBitcodeEmbedding::EmbedOptimized` previously didn't embed the command line for unknown reasons, this has been changed
2. The `embedBitcodeInModule` function now always embeds two new module flags: `embed.cmd` and `embed.cwd` (naming suggestions welcome).

For my use case I need to be able to reproduce the linking process from just the embedded bitcode file. The current working directory is necessary for this, since it is controlled by the build system and it cannot always be guessed. The original `FIXME` comment seems to agree with my use case, but the `.llvmbc` section alone definitely isn't enough (and in my opinion this does not count as doing it from just the bitcode):

> [...] the motivation for capturing post-merge bitcode and command line is replicating the compilation environment from bitcode, without needing to understand the dependencies (the functions to be imported). This assumes a clang - based invocation, case in which we have the command line.

>From 5a7c330982873c9198ef8c6f90d10f3ccb9ac5d9 Mon Sep 17 00:00:00 2001
From: Duncan Ogilvie <mr.exodia.tpodt at gmail.com>
Date: Thu, 25 Jan 2024 00:08:49 +0100
Subject: [PATCH] Embed the command line arguments during LTO

---
 clang/lib/CodeGen/BackendUtil.cpp            |  3 +-
 lld/COFF/Driver.cpp                          |  1 +
 lld/COFF/LTO.cpp                             |  3 +-
 lld/Common/CommonLinkerContext.cpp           |  9 +++++
 lld/ELF/Driver.cpp                           |  1 +
 lld/ELF/LTO.cpp                              |  3 +-
 lld/MachO/Driver.cpp                         |  1 +
 lld/MachO/LTO.cpp                            |  3 +-
 lld/MinGW/Driver.cpp                         |  1 +
 lld/include/lld/Common/CommonLinkerContext.h |  3 ++
 lld/wasm/Driver.cpp                          |  1 +
 lld/wasm/LTO.cpp                             |  2 +
 llvm/include/llvm/LTO/Config.h               |  2 +-
 llvm/include/llvm/LTO/LTOBackend.h           |  6 +--
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp    | 14 +++++++
 llvm/lib/LTO/LTO.cpp                         |  3 +-
 llvm/lib/LTO/LTOBackend.cpp                  | 39 +++++++++-----------
 llvm/lib/LTO/LTOCodeGenerator.cpp            |  3 +-
 18 files changed, 60 insertions(+), 38 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index ec203f6f28bc173..71aee18e63574e8 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1208,6 +1208,7 @@ static void runThinLTOBackend(
   Conf.CPU = TOpts.CPU;
   Conf.CodeModel = getCodeModel(CGOpts);
   Conf.MAttrs = TOpts.Features;
+  Conf.EmbedCmdArgs = CGOpts.CmdArgs;
   Conf.RelocModel = CGOpts.RelocationModel;
   std::optional<CodeGenOptLevel> OptLevelOrNone =
       CodeGenOpt::getLevel(CGOpts.OptimizationLevel);
@@ -1269,7 +1270,7 @@ static void runThinLTOBackend(
   if (Error E =
           thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
                       ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
-                      /* ModuleMap */ nullptr, CGOpts.CmdArgs)) {
+                      /* ModuleMap */ nullptr)) {
     handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
       errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
     });
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index e0afb6b18805b2e..8515aa6889fd020 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -1447,6 +1447,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   // Parse command line options.
   ArgParser parser(ctx);
   opt::InputArgList args = parser.parse(argsArr);
+  commonContext().storeCmdArgs(args);
 
   // Initialize time trace profiler.
   config->timeTraceEnabled = args.hasArg(OPT_time_trace_eq);
diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index 7df931911213672..ba6c5c6b241137e 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -52,8 +52,7 @@ lto::Config BitcodeCompiler::createConfig() {
   lto::Config c;
   c.Options = initTargetOptionsFromCodeGenFlags();
   c.Options.EmitAddrsig = true;
-  for (StringRef C : ctx.config.mllvmOpts)
-    c.MllvmArgs.emplace_back(C.str());
+  c.EmbedCmdArgs = commonContext().cmdArgs;
 
   // Always emit a section per function/datum with LTO. LLVM LTO should get most
   // of the benefit of linker GC, but there are still opportunities for ICF.
diff --git a/lld/Common/CommonLinkerContext.cpp b/lld/Common/CommonLinkerContext.cpp
index 12f56bc10ec9631..57aae8fd0a703d8 100644
--- a/lld/Common/CommonLinkerContext.cpp
+++ b/lld/Common/CommonLinkerContext.cpp
@@ -37,6 +37,15 @@ CommonLinkerContext::~CommonLinkerContext() {
   lctx = nullptr;
 }
 
+void CommonLinkerContext::storeCmdArgs(const llvm::opt::ArgList &args) {
+  cmdArgs.clear();
+  for (const llvm::opt::Arg *arg : args) {
+    StringRef str(args.getArgString(arg->getIndex()));
+    cmdArgs.insert(cmdArgs.end(), str.begin(), str.end());
+    cmdArgs.push_back('\0');
+  }
+}
+
 CommonLinkerContext &lld::commonContext() {
   assert(lctx);
   return *lctx;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index f4b7d1c9d5b9736..770ab73a2da689a 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -573,6 +573,7 @@ constexpr const char *saveTempsValues[] = {
 void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   ELFOptTable parser;
   opt::InputArgList args = parser.parse(argsArr.slice(1));
+  commonContext().storeCmdArgs(args);
 
   // Interpret these flags early because error()/warn() depend on them.
   errorHandler().errorLimit = args::getInteger(args, OPT_error_limit, 20);
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index c39c6e6ea74ba33..33927c7f4ea7059 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -54,8 +54,7 @@ static lto::Config createConfig() {
   // LLD supports the new relocations and address-significance tables.
   c.Options = initTargetOptionsFromCodeGenFlags();
   c.Options.EmitAddrsig = true;
-  for (StringRef C : config->mllvmOpts)
-    c.MllvmArgs.emplace_back(C.str());
+  c.EmbedCmdArgs = commonContext().cmdArgs;
 
   // Always emit a section per function/datum with LTO.
   c.Options.FunctionSections = true;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 411fbcfcf233eb8..7a9e4556bb83874 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1437,6 +1437,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
 
   MachOOptTable parser;
   InputArgList args = parser.parse(argsArr.slice(1));
+  commonContext().storeCmdArgs(args);
 
   ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now "
                                  "(use --error-limit=0 to see all errors)";
diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
index 7a9a9223a03227f..ab65615a1947730 100644
--- a/lld/MachO/LTO.cpp
+++ b/lld/MachO/LTO.cpp
@@ -42,8 +42,7 @@ static lto::Config createConfig() {
   lto::Config c;
   c.Options = initTargetOptionsFromCodeGenFlags();
   c.Options.EmitAddrsig = config->icfLevel == ICFLevel::safe;
-  for (StringRef C : config->mllvmOpts)
-    c.MllvmArgs.emplace_back(C.str());
+  c.EmbedCmdArgs = commonContext().cmdArgs;
   c.CodeModel = getCodeModelFromCMModel();
   c.CPU = getCPUStr();
   c.MAttrs = getMAttrs();
diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp
index 4752d92e3b1d71d..3f76115e507efc9 100644
--- a/lld/MinGW/Driver.cpp
+++ b/lld/MinGW/Driver.cpp
@@ -174,6 +174,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
 
   MinGWOptTable parser;
   opt::InputArgList args = parser.parse(argsArr.slice(1));
+  commonContext().storeCmdArgs(args);
 
   if (errorCount())
     return false;
diff --git a/lld/include/lld/Common/CommonLinkerContext.h b/lld/include/lld/Common/CommonLinkerContext.h
index 0627bbdc8bd877c..e52387ff64f33f4 100644
--- a/lld/include/lld/Common/CommonLinkerContext.h
+++ b/lld/include/lld/Common/CommonLinkerContext.h
@@ -22,6 +22,7 @@
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
 #include "llvm/Support/StringSaver.h"
+#include "llvm/Option/ArgList.h"
 
 namespace llvm {
 class raw_ostream;
@@ -33,12 +34,14 @@ class CommonLinkerContext {
 public:
   CommonLinkerContext();
   virtual ~CommonLinkerContext();
+  void storeCmdArgs(const llvm::opt::ArgList &args);
 
   static void destroy();
 
   llvm::BumpPtrAllocator bAlloc;
   llvm::StringSaver saver{bAlloc};
   llvm::DenseMap<void *, SpecificAllocBase *> instances;
+  std::vector<uint8_t> cmdArgs;
 
   ErrorHandler e;
 };
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 635f19f78b15e67..78003eefe3df0fc 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -1146,6 +1146,7 @@ static void checkZOptions(opt::InputArgList &args) {
 void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   WasmOptTable parser;
   opt::InputArgList args = parser.parse(argsArr.slice(1));
+  commonContext().storeCmdArgs(args);
 
   // Interpret these flags early because error()/warn() depend on them.
   errorHandler().errorLimit = args::getInteger(args, OPT_error_limit, 20);
diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp
index e523f0f61715353..c92ffc93ecd15c7 100644
--- a/lld/wasm/LTO.cpp
+++ b/lld/wasm/LTO.cpp
@@ -11,6 +11,7 @@
 #include "InputFiles.h"
 #include "Symbols.h"
 #include "lld/Common/Args.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Strings.h"
 #include "lld/Common/TargetOptionsCommandFlags.h"
@@ -40,6 +41,7 @@ using namespace llvm;
 namespace lld::wasm {
 static std::unique_ptr<lto::LTO> createLTO() {
   lto::Config c;
+  c.EmbedCmdArgs = commonContext().cmdArgs;
   c.Options = initTargetOptionsFromCodeGenFlags();
 
   // Always emit a section per function/data with LTO.
diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index 6fb55f1cf1686a5..613cb2444686e90 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -48,7 +48,7 @@ struct Config {
   std::string CPU;
   TargetOptions Options;
   std::vector<std::string> MAttrs;
-  std::vector<std::string> MllvmArgs;
+  std::vector<uint8_t> EmbedCmdArgs;
   std::vector<std::string> PassPlugins;
   /// For adding passes that run right before codegen.
   std::function<void(legacy::PassManager &)> PreCodeGenPassesHook;
diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h
index de89f4bb10dff26..1c6bd761b4a0652 100644
--- a/llvm/include/llvm/LTO/LTOBackend.h
+++ b/llvm/include/llvm/LTO/LTOBackend.h
@@ -36,8 +36,7 @@ namespace lto {
 /// Runs middle-end LTO optimizations on \p Mod.
 bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
          bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
-         const ModuleSummaryIndex *ImportSummary,
-         const std::vector<uint8_t> &CmdArgs);
+         const ModuleSummaryIndex *ImportSummary);
 
 /// Runs a regular LTO backend. The regular LTO backend can also act as the
 /// regular LTO phase of ThinLTO, which may need to access the combined index.
@@ -55,8 +54,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
                   Module &M, const ModuleSummaryIndex &CombinedIndex,
                   const FunctionImporter::ImportMapTy &ImportList,
                   const GVSummaryMapTy &DefinedGlobals,
-                  MapVector<StringRef, BitcodeModule> *ModuleMap,
-                  const std::vector<uint8_t> &CmdArgs = std::vector<uint8_t>());
+                  MapVector<StringRef, BitcodeModule> *ModuleMap);
 
 Error finalizeOptimizationRemarks(
     std::unique_ptr<ToolOutputFile> DiagOutputFile);
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index a5fc267b1883bfe..ab258415182796d 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -65,6 +65,7 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SHA1.h"
 #include "llvm/Support/raw_ostream.h"
@@ -5201,6 +5202,19 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
   if (Used)
     Used->eraseFromParent();
 
+  // Add the command line and working directory to the module flags
+  if (!CmdArgs.empty()) {
+    M.setModuleFlag(llvm::Module::Warning, "embed.cmd",
+                    llvm::MDString::get(M.getContext(),
+                                        StringRef((const char *)CmdArgs.data(),
+                                                  CmdArgs.size())));
+    SmallString<256> cwd;
+    if (!llvm::sys::fs::current_path(cwd)) {
+      M.setModuleFlag(llvm::Module::Warning, "embed.cwd",
+                      llvm::MDString::get(M.getContext(), cwd));
+    }
+  }
+
   // Embed the bitcode for the llvm module.
   std::string Data;
   ArrayRef<uint8_t> ModuleData;
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index b38c568d10cd097..d232b09f7f8a224 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -140,8 +140,7 @@ void llvm::computeLTOCacheKey(
     AddUnsigned(*Conf.CodeModel);
   else
     AddUnsigned(-1);
-  for (const auto &S : Conf.MllvmArgs)
-    AddString(S);
+  Hasher.update(Conf.EmbedCmdArgs);
   AddUnsigned(static_cast<int>(Conf.CGOptLevel));
   AddUnsigned(static_cast<int>(Conf.CGFileType));
   AddUnsigned(Conf.OptLevel);
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index ccc4276e36dacf0..4f7935faf0621d1 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -342,24 +342,16 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
 
 bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
               bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
-              const ModuleSummaryIndex *ImportSummary,
-              const std::vector<uint8_t> &CmdArgs) {
+              const ModuleSummaryIndex *ImportSummary) {
   if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) {
-    // FIXME: the motivation for capturing post-merge bitcode and command line
-    // is replicating the compilation environment from bitcode, without needing
-    // to understand the dependencies (the functions to be imported). This
-    // assumes a clang - based invocation, case in which we have the command
-    // line.
-    // It's not very clear how the above motivation would map in the
-    // linker-based case, so we currently don't plumb the command line args in
-    // that case.
-    if (CmdArgs.empty())
+    if (Conf.EmbedCmdArgs.empty())
       LLVM_DEBUG(
           dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but "
                     "command line arguments are not available");
     llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
-                               /*EmbedBitcode*/ true, /*EmbedCmdline*/ true,
-                               /*Cmdline*/ CmdArgs);
+                               /*EmbedBitcode*/ true,
+                               /*EmbedCmdline*/ true,
+                               /*CmdArgs*/ Conf.EmbedCmdArgs);
   }
   // FIXME: Plumb the combined index into the new pass manager.
   runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary,
@@ -373,11 +365,17 @@ static void codegen(const Config &Conf, TargetMachine *TM,
   if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
     return;
 
-  if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized)
+  if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized) {
+    if (Conf.EmbedCmdArgs.empty())
+      LLVM_DEBUG(
+          dbgs()
+          << "Post-(Thin)LTO optimize bitcode embedding was requested, but "
+             "command line arguments are not available");
     llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
                                /*EmbedBitcode*/ true,
-                               /*EmbedCmdline*/ false,
-                               /*CmdArgs*/ std::vector<uint8_t>());
+                               /*EmbedCmdline*/ true,
+                               /*CmdArgs*/ Conf.EmbedCmdArgs);
+  }
 
   std::unique_ptr<ToolOutputFile> DwoOut;
   SmallString<1024> DwoFile(Conf.SplitDwarfOutput);
@@ -513,8 +511,7 @@ Error lto::backend(const Config &C, AddStreamFn AddStream,
   LLVM_DEBUG(dbgs() << "Running regular LTO\n");
   if (!C.CodeGenOnly) {
     if (!opt(C, TM.get(), 0, Mod, /*IsThinLTO=*/false,
-             /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr,
-             /*CmdArgs*/ std::vector<uint8_t>()))
+             /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr))
       return Error::success();
   }
 
@@ -552,8 +549,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
                        Module &Mod, const ModuleSummaryIndex &CombinedIndex,
                        const FunctionImporter::ImportMapTy &ImportList,
                        const GVSummaryMapTy &DefinedGlobals,
-                       MapVector<StringRef, BitcodeModule> *ModuleMap,
-                       const std::vector<uint8_t> &CmdArgs) {
+                       MapVector<StringRef, BitcodeModule> *ModuleMap) {
   Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
   if (!TOrErr)
     return TOrErr.takeError();
@@ -586,8 +582,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
       [&](Module &Mod, TargetMachine *TM,
           std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) {
         if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
-                 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
-                 CmdArgs))
+                 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex))
           return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
 
         codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index 52d8fff14be9cec..c5ec7856490ed9f 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -637,8 +637,7 @@ bool LTOCodeGenerator::optimize() {
   ModuleSummaryIndex CombinedIndex(false);
   TargetMach = createTargetMachine();
   if (!opt(Config, TargetMach.get(), 0, *MergedModule, /*IsThinLTO=*/false,
-           /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr,
-           /*CmdArgs*/ std::vector<uint8_t>())) {
+           /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr)) {
     emitError("LTO middle-end optimizations failed");
     return false;
   }



More information about the cfe-commits mailing list