[clang] [lld] [llvm] Embed the command line arguments during LTO (PR #79390)

Duncan Ogilvie via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 9 05:20:21 PDT 2024


https://github.com/mrexodia updated https://github.com/llvm/llvm-project/pull/79390

>From e6737b6e65669160868a85b8a870fe6fd70b94b0 Mon Sep 17 00:00:00 2001
From: Duncan Ogilvie <mr.exodia.tpodt at gmail.com>
Date: Thu, 25 Jan 2024 00:08:49 +0100
Subject: [PATCH] Embed the command line arguments during LTO

---
 clang/lib/CodeGen/BackendUtil.cpp            |  3 +-
 lld/COFF/Driver.cpp                          |  1 +
 lld/COFF/LTO.cpp                             |  3 +-
 lld/Common/CommonLinkerContext.cpp           |  9 ++++++
 lld/ELF/Driver.cpp                           |  1 +
 lld/ELF/LTO.cpp                              |  3 +-
 lld/MachO/Driver.cpp                         |  1 +
 lld/MachO/LTO.cpp                            |  3 +-
 lld/MinGW/Driver.cpp                         |  1 +
 lld/include/lld/Common/CommonLinkerContext.h |  3 ++
 lld/wasm/Driver.cpp                          |  1 +
 lld/wasm/LTO.cpp                             |  2 ++
 llvm/include/llvm/LTO/Config.h               |  2 +-
 llvm/include/llvm/LTO/LTOBackend.h           |  6 ++--
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp    | 14 ++++++++++
 llvm/lib/LTO/LTO.cpp                         |  3 +-
 llvm/lib/LTO/LTOBackend.cpp                  | 29 ++++++--------------
 llvm/lib/LTO/LTOCodeGenerator.cpp            |  3 +-
 18 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index e765bbf637a661..e4f597798b06c8 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1222,6 +1222,7 @@ static void runThinLTOBackend(
   Conf.CPU = TOpts.CPU;
   Conf.CodeModel = getCodeModel(CGOpts);
   Conf.MAttrs = TOpts.Features;
+  Conf.EmbedCmdArgs = CGOpts.CmdArgs;
   Conf.RelocModel = CGOpts.RelocationModel;
   std::optional<CodeGenOptLevel> OptLevelOrNone =
       CodeGenOpt::getLevel(CGOpts.OptimizationLevel);
@@ -1283,7 +1284,7 @@ static void runThinLTOBackend(
   if (Error E =
           thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
                       ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
-                      /* ModuleMap */ nullptr, CGOpts.CmdArgs)) {
+                      /* ModuleMap */ nullptr)) {
     handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
       errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
     });
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 9e28b1c50be504..cc656f5188fea8 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -1439,6 +1439,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   // Parse command line options.
   ArgParser parser(ctx);
   opt::InputArgList args = parser.parse(argsArr);
+  ctx.storeCmdArgs(args);
 
   // Initialize time trace profiler.
   config->timeTraceEnabled = args.hasArg(OPT_time_trace_eq);
diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index 5c881bc01c663d..a0b1af33eb8f00 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -52,8 +52,7 @@ lto::Config BitcodeCompiler::createConfig() {
   lto::Config c;
   c.Options = initTargetOptionsFromCodeGenFlags();
   c.Options.EmitAddrsig = true;
-  for (StringRef C : ctx.config.mllvmOpts)
-    c.MllvmArgs.emplace_back(C.str());
+  c.EmbedCmdArgs = context().cmdArgs;
 
   // Always emit a section per function/datum with LTO. LLVM LTO should get most
   // of the benefit of linker GC, but there are still opportunities for ICF.
diff --git a/lld/Common/CommonLinkerContext.cpp b/lld/Common/CommonLinkerContext.cpp
index 12f56bc10ec963..57aae8fd0a703d 100644
--- a/lld/Common/CommonLinkerContext.cpp
+++ b/lld/Common/CommonLinkerContext.cpp
@@ -37,6 +37,15 @@ CommonLinkerContext::~CommonLinkerContext() {
   lctx = nullptr;
 }
 
+void CommonLinkerContext::storeCmdArgs(const llvm::opt::ArgList &args) {
+  cmdArgs.clear();
+  for (const llvm::opt::Arg *arg : args) {
+    StringRef str(args.getArgString(arg->getIndex()));
+    cmdArgs.insert(cmdArgs.end(), str.begin(), str.end());
+    cmdArgs.push_back('\0');
+  }
+}
+
 CommonLinkerContext &lld::commonContext() {
   assert(lctx);
   return *lctx;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 8aa2380ba3a177..9623a35549537e 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -613,6 +613,7 @@ constexpr const char *saveTempsValues[] = {
 void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   ELFOptTable parser;
   opt::InputArgList args = parser.parse(argsArr.slice(1));
+  context().storeCmdArgs(args);
 
   // Interpret these flags early because error()/warn() depend on them.
   errorHandler().errorLimit = args::getInteger(args, OPT_error_limit, 20);
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 935d0a9eab9ee0..7d453cd187b9b4 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -54,8 +54,7 @@ static lto::Config createConfig() {
   // LLD supports the new relocations and address-significance tables.
   c.Options = initTargetOptionsFromCodeGenFlags();
   c.Options.EmitAddrsig = true;
-  for (StringRef C : config->mllvmOpts)
-    c.MllvmArgs.emplace_back(C.str());
+  c.EmbedCmdArgs = context().cmdArgs;
 
   // Always emit a section per function/datum with LTO.
   c.Options.FunctionSections = true;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index f3d2a93914f717..de99fdc6fcab28 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1501,6 +1501,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
 
   MachOOptTable parser;
   InputArgList args = parser.parse(argsArr.slice(1));
+  ctx->storeCmdArgs(args);
 
   ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now "
                                  "(use --error-limit=0 to see all errors)";
diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
index 7a9a9223a03227..c96f7969f20ed7 100644
--- a/lld/MachO/LTO.cpp
+++ b/lld/MachO/LTO.cpp
@@ -42,8 +42,7 @@ static lto::Config createConfig() {
   lto::Config c;
   c.Options = initTargetOptionsFromCodeGenFlags();
   c.Options.EmitAddrsig = config->icfLevel == ICFLevel::safe;
-  for (StringRef C : config->mllvmOpts)
-    c.MllvmArgs.emplace_back(C.str());
+  c.EmbedCmdArgs = context().cmdArgs;
   c.CodeModel = getCodeModelFromCMModel();
   c.CPU = getCPUStr();
   c.MAttrs = getMAttrs();
diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp
index 35fd478a21905e..f982ae0373593b 100644
--- a/lld/MinGW/Driver.cpp
+++ b/lld/MinGW/Driver.cpp
@@ -184,6 +184,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
 
   MinGWOptTable parser;
   opt::InputArgList args = parser.parse(argsArr.slice(1));
+  ctx->storeCmdArgs(args);
 
   if (errorCount())
     return false;
diff --git a/lld/include/lld/Common/CommonLinkerContext.h b/lld/include/lld/Common/CommonLinkerContext.h
index 0627bbdc8bd877..19a8212cae1954 100644
--- a/lld/include/lld/Common/CommonLinkerContext.h
+++ b/lld/include/lld/Common/CommonLinkerContext.h
@@ -21,6 +21,7 @@
 
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
+#include "llvm/Option/ArgList.h"
 #include "llvm/Support/StringSaver.h"
 
 namespace llvm {
@@ -33,12 +34,14 @@ class CommonLinkerContext {
 public:
   CommonLinkerContext();
   virtual ~CommonLinkerContext();
+  void storeCmdArgs(const llvm::opt::ArgList &args);
 
   static void destroy();
 
   llvm::BumpPtrAllocator bAlloc;
   llvm::StringSaver saver{bAlloc};
   llvm::DenseMap<void *, SpecificAllocBase *> instances;
+  std::vector<uint8_t> cmdArgs;
 
   ErrorHandler e;
 };
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 5368fe79b7eb89..3b842ade06d091 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -1186,6 +1186,7 @@ static void checkZOptions(opt::InputArgList &args) {
 void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   WasmOptTable parser;
   opt::InputArgList args = parser.parse(argsArr.slice(1));
+  context().storeCmdArgs(args);
 
   // Interpret these flags early because error()/warn() depend on them.
   errorHandler().errorLimit = args::getInteger(args, OPT_error_limit, 20);
diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp
index e523f0f6171535..6df526cbe18638 100644
--- a/lld/wasm/LTO.cpp
+++ b/lld/wasm/LTO.cpp
@@ -11,6 +11,7 @@
 #include "InputFiles.h"
 #include "Symbols.h"
 #include "lld/Common/Args.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Strings.h"
 #include "lld/Common/TargetOptionsCommandFlags.h"
@@ -40,6 +41,7 @@ using namespace llvm;
 namespace lld::wasm {
 static std::unique_ptr<lto::LTO> createLTO() {
   lto::Config c;
+  c.EmbedCmdArgs = context().cmdArgs;
   c.Options = initTargetOptionsFromCodeGenFlags();
 
   // Always emit a section per function/data with LTO.
diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index 482b6e55a19d35..20fcf5ea75b1d3 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -48,7 +48,7 @@ struct Config {
   std::string CPU;
   TargetOptions Options;
   std::vector<std::string> MAttrs;
-  std::vector<std::string> MllvmArgs;
+  std::vector<uint8_t> EmbedCmdArgs;
   std::vector<std::string> PassPlugins;
   /// For adding passes that run right before codegen.
   std::function<void(legacy::PassManager &)> PreCodeGenPassesHook;
diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h
index de89f4bb10dff2..1c6bd761b4a065 100644
--- a/llvm/include/llvm/LTO/LTOBackend.h
+++ b/llvm/include/llvm/LTO/LTOBackend.h
@@ -36,8 +36,7 @@ namespace lto {
 /// Runs middle-end LTO optimizations on \p Mod.
 bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
          bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
-         const ModuleSummaryIndex *ImportSummary,
-         const std::vector<uint8_t> &CmdArgs);
+         const ModuleSummaryIndex *ImportSummary);
 
 /// Runs a regular LTO backend. The regular LTO backend can also act as the
 /// regular LTO phase of ThinLTO, which may need to access the combined index.
@@ -55,8 +54,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
                   Module &M, const ModuleSummaryIndex &CombinedIndex,
                   const FunctionImporter::ImportMapTy &ImportList,
                   const GVSummaryMapTy &DefinedGlobals,
-                  MapVector<StringRef, BitcodeModule> *ModuleMap,
-                  const std::vector<uint8_t> &CmdArgs = std::vector<uint8_t>());
+                  MapVector<StringRef, BitcodeModule> *ModuleMap);
 
 Error finalizeOptimizationRemarks(
     std::unique_ptr<ToolOutputFile> DiagOutputFile);
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 33ec14b60dd288..408687f49ac703 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -66,6 +66,7 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SHA1.h"
 #include "llvm/Support/raw_ostream.h"
@@ -5403,6 +5404,19 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
   if (Used)
     Used->eraseFromParent();
 
+  // Add the command line and working directory to the module flags
+  if (!CmdArgs.empty()) {
+    M.setModuleFlag(llvm::Module::Warning, "embed.cmd",
+                    llvm::MDString::get(M.getContext(),
+                                        StringRef((const char *)CmdArgs.data(),
+                                                  CmdArgs.size())));
+    SmallString<256> cwd;
+    if (!llvm::sys::fs::current_path(cwd)) {
+      M.setModuleFlag(llvm::Module::Warning, "embed.cwd",
+                      llvm::MDString::get(M.getContext(), cwd));
+    }
+  }
+
   // Embed the bitcode for the llvm module.
   std::string Data;
   ArrayRef<uint8_t> ModuleData;
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index bb3c9f7acdb8e5..29eb831ae46804 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -146,8 +146,7 @@ void llvm::computeLTOCacheKey(
     AddUnsigned(*Conf.CodeModel);
   else
     AddUnsigned(-1);
-  for (const auto &S : Conf.MllvmArgs)
-    AddString(S);
+  Hasher.update(Conf.EmbedCmdArgs);
   AddUnsigned(static_cast<int>(Conf.CGOptLevel));
   AddUnsigned(static_cast<int>(Conf.CGFileType));
   AddUnsigned(Conf.OptLevel);
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index effaed2d9bfb60..37996710d8fb07 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -350,24 +350,16 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
 
 bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
               bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
-              const ModuleSummaryIndex *ImportSummary,
-              const std::vector<uint8_t> &CmdArgs) {
+              const ModuleSummaryIndex *ImportSummary) {
   if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) {
-    // FIXME: the motivation for capturing post-merge bitcode and command line
-    // is replicating the compilation environment from bitcode, without needing
-    // to understand the dependencies (the functions to be imported). This
-    // assumes a clang - based invocation, case in which we have the command
-    // line.
-    // It's not very clear how the above motivation would map in the
-    // linker-based case, so we currently don't plumb the command line args in
-    // that case.
-    if (CmdArgs.empty())
+    if (Conf.EmbedCmdArgs.empty())
       LLVM_DEBUG(
           dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but "
                     "command line arguments are not available");
     llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
-                               /*EmbedBitcode*/ true, /*EmbedCmdline*/ true,
-                               /*Cmdline*/ CmdArgs);
+                               /*EmbedBitcode*/ true,
+                               /*EmbedCmdline*/ true,
+                               /*CmdArgs*/ Conf.EmbedCmdArgs);
   }
   // FIXME: Plumb the combined index into the new pass manager.
   runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary,
@@ -385,7 +377,7 @@ static void codegen(const Config &Conf, TargetMachine *TM,
     llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
                                /*EmbedBitcode*/ true,
                                /*EmbedCmdline*/ false,
-                               /*CmdArgs*/ std::vector<uint8_t>());
+                               /*CmdArgs*/ Conf.EmbedCmdArgs);
 
   std::unique_ptr<ToolOutputFile> DwoOut;
   SmallString<1024> DwoFile(Conf.SplitDwarfOutput);
@@ -525,8 +517,7 @@ Error lto::backend(const Config &C, AddStreamFn AddStream,
   LLVM_DEBUG(dbgs() << "Running regular LTO\n");
   if (!C.CodeGenOnly) {
     if (!opt(C, TM.get(), 0, Mod, /*IsThinLTO=*/false,
-             /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr,
-             /*CmdArgs*/ std::vector<uint8_t>()))
+             /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr))
       return Error::success();
   }
 
@@ -564,8 +555,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
                        Module &Mod, const ModuleSummaryIndex &CombinedIndex,
                        const FunctionImporter::ImportMapTy &ImportList,
                        const GVSummaryMapTy &DefinedGlobals,
-                       MapVector<StringRef, BitcodeModule> *ModuleMap,
-                       const std::vector<uint8_t> &CmdArgs) {
+                       MapVector<StringRef, BitcodeModule> *ModuleMap) {
   Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
   if (!TOrErr)
     return TOrErr.takeError();
@@ -598,8 +588,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
       [&](Module &Mod, TargetMachine *TM,
           std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) {
         if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
-                 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
-                 CmdArgs))
+                 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex))
           return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
 
         codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index 34aacb660144f4..4ef626ae5728a9 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -629,8 +629,7 @@ bool LTOCodeGenerator::optimize() {
   ModuleSummaryIndex CombinedIndex(false);
   TargetMach = createTargetMachine();
   if (!opt(Config, TargetMach.get(), 0, *MergedModule, /*IsThinLTO=*/false,
-           /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr,
-           /*CmdArgs*/ std::vector<uint8_t>())) {
+           /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr)) {
     emitError("LTO middle-end optimizations failed");
     return false;
   }



More information about the cfe-commits mailing list