[lld] 21f8311 - [lld-macho] Add very basic support for LTO

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 10 12:19:47 PST 2020


Author: Jez Ng
Date: 2020-11-10T12:19:28-08:00
New Revision: 21f831134c90671bd720533c1f46b88b6979300f

URL: https://github.com/llvm/llvm-project/commit/21f831134c90671bd720533c1f46b88b6979300f
DIFF: https://github.com/llvm/llvm-project/commit/21f831134c90671bd720533c1f46b88b6979300f.diff

LOG: [lld-macho] Add very basic support for LTO

Just enough to consume some bitcode files and link them. There's more
to be done around the symbol resolution API and the LTO config, but I don't yet
understand what all the various LTO settings do...

Reviewed By: #lld-macho, compnerd, smeenai, MaskRay

Differential Revision: https://reviews.llvm.org/D90663

Added: 
    lld/MachO/LTO.cpp
    lld/MachO/LTO.h
    lld/test/MachO/lto-save-temps.ll

Modified: 
    lld/MachO/CMakeLists.txt
    lld/MachO/Config.h
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/Options.td

Removed: 
    


################################################################################
diff  --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 880f8ddf7b98..6ddc88fb8618 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -12,6 +12,7 @@ add_lld_library(lldMachO2
   ExportTrie.cpp
   InputFiles.cpp
   InputSection.cpp
+  LTO.cpp
   MergedOutputSection.cpp
   ObjC.cpp
   OutputSection.cpp
@@ -26,8 +27,11 @@ add_lld_library(lldMachO2
   ${LLVM_TARGETS_TO_BUILD}
   BinaryFormat
   Core
+  LTO
+  MC
   Object
   Option
+  Passes
   Support
   TextAPI
 

diff  --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index f2ca0a049537..633dbb0184fc 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -39,6 +39,7 @@ struct Configuration {
   bool isPic = false;
   bool headerPadMaxInstallNames = false;
   bool searchDylibsFirst = false;
+  bool saveTemps = false;
   uint32_t headerPad;
   llvm::StringRef installName;
   llvm::StringRef outputFile;

diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 98bf5df5adac..6735611666a9 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -10,6 +10,7 @@
 #include "Config.h"
 #include "DriverUtils.h"
 #include "InputFiles.h"
+#include "LTO.h"
 #include "ObjC.h"
 #include "OutputSection.h"
 #include "OutputSegment.h"
@@ -30,6 +31,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/BinaryFormat/Magic.h"
+#include "llvm/LTO/LTO.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
@@ -37,6 +39,7 @@
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/TargetSelect.h"
 
 #include <algorithm>
 
@@ -316,16 +319,18 @@ static InputFile *addFile(StringRef path) {
     newFile = make<DylibFile>(mbref);
     break;
   case file_magic::tapi_file: {
-    Optional<DylibFile *> dylibFile = makeDylibFromTAPI(mbref);
-    if (!dylibFile)
-      return nullptr;
-    newFile = *dylibFile;
+    if (Optional<DylibFile *> dylibFile = makeDylibFromTAPI(mbref))
+      newFile = *dylibFile;
     break;
   }
+  case file_magic::bitcode:
+    newFile = make<BitcodeFile>(mbref);
+    break;
   default:
     error(path + ": unhandled file type");
   }
-  inputFiles.push_back(newFile);
+  if (newFile)
+    inputFiles.push_back(newFile);
   return newFile;
 }
 
@@ -455,6 +460,27 @@ static bool markSubLibrary(StringRef searchName) {
   return false;
 }
 
+// This function is called on startup. We need this for LTO since
+// LTO calls LLVM functions to compile bitcode files to native code.
+// Technically this can be delayed until we read bitcode files, but
+// we don't bother to do lazily because the initialization is fast.
+static void initLLVM() {
+  InitializeAllTargets();
+  InitializeAllTargetMCs();
+  InitializeAllAsmPrinters();
+  InitializeAllAsmParsers();
+}
+
+static void compileBitcodeFiles() {
+  auto lto = make<BitcodeCompiler>();
+  for (InputFile *file : inputFiles)
+    if (auto *bitcodeFile = dyn_cast<BitcodeFile>(file))
+      lto->add(*bitcodeFile);
+
+  for (ObjFile *file : lto->compile())
+    inputFiles.push_back(file);
+}
+
 // Replaces common symbols with defined symbols residing in __common sections.
 // This function must be called after all symbol names are resolved (i.e. after
 // all InputFiles have been loaded.) As a result, later operations won't see
@@ -612,6 +638,8 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
     config->searchDylibsFirst =
         (arg && arg->getOption().getID() == OPT_search_dylibs_first);
 
+  config->saveTemps = args.hasArg(OPT_save_temps);
+
   if (args.hasArg(OPT_v)) {
     message(getLLDVersion());
     message(StringRef("Library search paths:") +
@@ -692,6 +720,8 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
       error("-sub_library " + searchName + " does not match a supplied dylib");
   }
 
+  initLLVM();
+  compileBitcodeFiles();
   replaceCommonSymbols();
 
   StringRef orderFile = args.getLastArgValue(OPT_order_file);

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 35211ea25e73..cd6fbc316fe1 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -58,6 +58,7 @@
 #include "lld/Common/Memory.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/LTO/LTO.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
@@ -565,6 +566,11 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
                      file->subsections.end());
 }
 
+BitcodeFile::BitcodeFile(MemoryBufferRef mbref)
+    : InputFile(BitcodeKind, mbref) {
+  obj = check(lto::InputFile::create(mbref));
+}
+
 // Returns "<internal>" or "baz.o".
 std::string lld::toString(const InputFile *file) {
   return file ? std::string(file->getName()) : "<internal>";

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 59b0e41d5de3..0cb7a10aa2bb 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -23,6 +23,12 @@
 #include <map>
 #include <vector>
 
+namespace llvm {
+namespace lto {
+class InputFile;
+} // namespace lto
+} // namespace llvm
+
 namespace lld {
 namespace macho {
 
@@ -39,9 +45,10 @@ class InputFile {
 public:
   enum Kind {
     ObjKind,
+    OpaqueKind,
     DylibKind,
     ArchiveKind,
-    OpaqueKind,
+    BitcodeKind,
   };
 
   virtual ~InputFile() = default;
@@ -127,6 +134,14 @@ class ArchiveFile : public InputFile {
   llvm::DenseSet<uint64_t> seen;
 };
 
+class BitcodeFile : public InputFile {
+public:
+  explicit BitcodeFile(MemoryBufferRef mb);
+  static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
+
+  std::unique_ptr<llvm::lto::InputFile> obj;
+};
+
 extern std::vector<InputFile *> inputFiles;
 
 llvm::Optional<MemoryBufferRef> readFile(StringRef path);

diff  --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
new file mode 100644
index 000000000000..bf0db6175690
--- /dev/null
+++ b/lld/MachO/LTO.cpp
@@ -0,0 +1,82 @@
+//===- LTO.cpp ------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LTO.h"
+#include "Config.h"
+#include "InputFiles.h"
+
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Strings.h"
+#include "lld/Common/TargetOptionsCommandFlags.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace lld;
+using namespace lld::macho;
+using namespace llvm;
+
+static lto::Config createConfig() {
+  lto::Config c;
+  c.Options = initTargetOptionsFromCodeGenFlags();
+  return c;
+}
+
+BitcodeCompiler::BitcodeCompiler() {
+  auto backend =
+      lto::createInProcessThinBackend(llvm::heavyweight_hardware_concurrency());
+  ltoObj = std::make_unique<lto::LTO>(createConfig(), backend);
+}
+
+void BitcodeCompiler::add(BitcodeFile &f) {
+  ArrayRef<lto::InputFile::Symbol> objSyms = f.obj->symbols();
+  std::vector<lto::SymbolResolution> resols;
+  resols.reserve(objSyms.size());
+
+  // Provide a resolution to the LTO API for each symbol.
+  for (const lto::InputFile::Symbol &objSym : objSyms) {
+    resols.emplace_back();
+    lto::SymbolResolution &r = resols.back();
+
+    // Ideally we shouldn't check for SF_Undefined but currently IRObjectFile
+    // reports two symbols for module ASM defined. Without this check, lld
+    // flags an undefined in IR with a definition in ASM as prevailing.
+    // Once IRObjectFile is fixed to report only one symbol this hack can
+    // be removed.
+    r.Prevailing = !objSym.isUndefined();
+
+    // TODO: set the other resolution configs properly
+    r.VisibleToRegularObj = true;
+  }
+  checkError(ltoObj->add(std::move(f.obj), resols));
+}
+
+// Merge all the bitcode files we have seen, codegen the result
+// and return the resulting ObjectFile(s).
+std::vector<ObjFile *> BitcodeCompiler::compile() {
+  unsigned maxTasks = ltoObj->getMaxTasks();
+  buf.resize(maxTasks);
+
+  checkError(ltoObj->run([&](size_t task) {
+    return std::make_unique<lto::NativeObjectStream>(
+        std::make_unique<raw_svector_ostream>(buf[task]));
+  }));
+
+  if (config->saveTemps) {
+    if (!buf[0].empty())
+      saveBuffer(buf[0], config->outputFile + ".lto.o");
+    for (unsigned i = 1; i != maxTasks; ++i)
+      saveBuffer(buf[i], config->outputFile + Twine(i) + ".lto.o");
+  }
+
+  std::vector<ObjFile *> ret;
+  for (unsigned i = 0; i != maxTasks; ++i)
+    if (!buf[i].empty())
+      ret.push_back(make<ObjFile>(MemoryBufferRef(buf[i], "lto.tmp")));
+
+  return ret;
+}

diff  --git a/lld/MachO/LTO.h b/lld/MachO/LTO.h
new file mode 100644
index 000000000000..2577374590b7
--- /dev/null
+++ b/lld/MachO/LTO.h
@@ -0,0 +1,43 @@
+//===- LTO.h ----------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_LTO_H
+#define LLD_MACHO_LTO_H
+
+#include "llvm/ADT/SmallString.h"
+#include <memory>
+#include <vector>
+
+namespace llvm {
+namespace lto {
+class LTO;
+} // namespace lto
+} // namespace llvm
+
+namespace lld {
+namespace macho {
+
+class BitcodeFile;
+class ObjFile;
+
+class BitcodeCompiler {
+public:
+  BitcodeCompiler();
+
+  void add(BitcodeFile &f);
+  std::vector<ObjFile *> compile();
+
+private:
+  std::unique_ptr<llvm::lto::LTO> ltoObj;
+  std::vector<llvm::SmallString<0>> buf;
+};
+
+} // namespace macho
+} // namespace lld
+
+#endif

diff  --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 353ef03455bf..cdbdbb26702e 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -453,6 +453,9 @@ def dependency_info : Separate<["-"], "dependency_info">,
      HelpText<"Dump dependency info">,
      Flags<[HelpHidden]>,
      Group<grp_introspect>;
+def save_temps : Flag<["-"], "save-temps">,
+     HelpText<"Save temporary files instead of deleting them">,
+     Group<grp_introspect>;
 
 def grp_symtab : OptionGroup<"symtab">, HelpText<"SYMBOL TABLE OPTIMIZATIONS">;
 
@@ -1233,10 +1236,6 @@ def random_uuid : Flag<["-"], "random_uuid">,
      HelpText<"This option is undocumented in ld64">,
      Flags<[HelpHidden]>,
      Group<grp_undocumented>;
-def save_temps : Flag<["-"], "save-temps">,
-     HelpText<"This option is undocumented in ld64">,
-     Flags<[HelpHidden]>,
-     Group<grp_undocumented>;
 def simulator_support : Flag<["-"], "simulator_support">,
      HelpText<"This option is undocumented in ld64">,
      Flags<[HelpHidden]>,

diff  --git a/lld/test/MachO/lto-save-temps.ll b/lld/test/MachO/lto-save-temps.ll
new file mode 100644
index 000000000000..63c196b4a73f
--- /dev/null
+++ b/lld/test/MachO/lto-save-temps.ll
@@ -0,0 +1,48 @@
+; REQUIRES: x86
+
+; Test that we compile regular LTO inputs in a single task but handle ThinLTO
+; modules in separate tasks.
+
+; RUN: rm -rf %t; split-file %s %t
+; RUN: llvm-as %t/foo.ll -o %t/foo.o
+; RUN: llvm-as %t/test.ll -o %t/test.o
+; RUN: %lld -save-temps %t/foo.o %t/test.o -o %t/test
+; RUN: llvm-objdump -d --no-show-raw-insn %t/test.lto.o | FileCheck %s --check-prefix=ALL
+; RUN: llvm-objdump -d --no-show-raw-insn %t/test | FileCheck %s --check-prefix=ALL
+
+; RUN: rm -rf %t; split-file %s %t
+; RUN: opt -module-summary %t/foo.ll -o %t/foo.o
+; RUN: opt -module-summary %t/test.ll -o %t/test.o
+; RUN: %lld -save-temps %t/foo.o %t/test.o -o %t/test
+; RUN: llvm-objdump -d --no-show-raw-insn %t/test1.lto.o | FileCheck %s --check-prefix=FOO
+; RUN: llvm-objdump -d --no-show-raw-insn %t/test2.lto.o | FileCheck %s --check-prefix=MAIN
+; RUN: llvm-objdump -d --no-show-raw-insn %t/test | FileCheck %s --check-prefix=ALL
+
+; FOO:      <_foo>:
+; FOO-NEXT: retq
+
+; MAIN:      <_main>:
+; MAIN-NEXT: retq
+
+; ALL:      <_foo>:
+; ALL-NEXT: retq
+; ALL:      <_main>:
+; ALL-NEXT: retq
+
+;--- foo.ll
+
+target triple = "x86_64-apple-macosx10.15.0"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo() {
+  ret void
+}
+
+;--- test.ll
+
+target triple = "x86_64-apple-macosx10.15.0"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @main() {
+  ret void
+}


        


More information about the llvm-commits mailing list