[lld] 5a856f5 - Reland [lld-macho]Implement bundle_loader

Vy Nguyen via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 22 11:05:26 PST 2021


Author: Vy Nguyen
Date: 2021-02-22T14:05:12-05:00
New Revision: 5a856f5b44997dd45db08f434e5da1f85e4693f5

URL: https://github.com/llvm/llvm-project/commit/5a856f5b44997dd45db08f434e5da1f85e4693f5
DIFF: https://github.com/llvm/llvm-project/commit/5a856f5b44997dd45db08f434e5da1f85e4693f5.diff

LOG: Reland [lld-macho]Implement bundle_loader
  Reland 1a0afcf518717f61d45a1cdc6ad1a6436ec663b1
  https://reviews.llvm.org/D95913

New change: fix UB bug caused by copying empty path/name. (since the executable does not have a name)

Added: 
    lld/test/MachO/bundle-loader.s

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/Driver.h
    lld/MachO/DriverUtils.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/Options.td
    lld/MachO/SyntheticSections.cpp
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index eaa3b8f515e5..da99379c7198 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -261,7 +261,8 @@ static std::vector<ArchiveMember> getArchiveMembers(MemoryBufferRef mb) {
   return v;
 }
 
-static InputFile *addFile(StringRef path, bool forceLoadArchive) {
+static InputFile *addFile(StringRef path, bool forceLoadArchive,
+                          bool isBundleLoader = false) {
   Optional<MemoryBufferRef> buffer = readFile(path);
   if (!buffer)
     return nullptr;
@@ -325,6 +326,16 @@ static InputFile *addFile(StringRef path, bool forceLoadArchive) {
   case file_magic::bitcode:
     newFile = make<BitcodeFile>(mbref);
     break;
+  case file_magic::macho_executable:
+  case file_magic::macho_bundle:
+    // We only allow executable and bundle type here if it is used
+    // as a bundle loader.
+    if (!isBundleLoader)
+      error(path + ": unhandled file type");
+    if (Optional<DylibFile *> dylibFile =
+            loadDylib(mbref, nullptr, isBundleLoader))
+      newFile = *dylibFile;
+    break;
   default:
     error(path + ": unhandled file type");
   }
@@ -756,6 +767,11 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
   config->printEachFile = args.hasArg(OPT_t);
   config->printWhyLoad = args.hasArg(OPT_why_load);
   config->outputType = getOutputType(args);
+  if (const opt::Arg *arg = args.getLastArg(OPT_bundle_loader)) {
+    if (config->outputType != MH_BUNDLE)
+      error("-bundle_loader can only be used with MachO bundle output");
+    addFile(arg->getValue(), false, true);
+  }
   config->ltoObjPath = args.getLastArgValue(OPT_object_path_lto);
   config->ltoNewPassManager =
       args.hasFlag(OPT_no_lto_legacy_pass_manager, OPT_lto_legacy_pass_manager,
@@ -817,6 +833,7 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
     const auto &opt = arg->getOption();
     warnIfDeprecatedOption(opt);
     warnIfUnimplementedOption(opt);
+
     // TODO: are any of these better handled via filtered() or getLastArg()?
     switch (opt.getID()) {
     case OPT_INPUT:

diff  --git a/lld/MachO/Driver.h b/lld/MachO/Driver.h
index 08491f51261a..bf7abdda4e01 100644
--- a/lld/MachO/Driver.h
+++ b/lld/MachO/Driver.h
@@ -44,7 +44,8 @@ std::string createResponseFile(const llvm::opt::InputArgList &args);
 llvm::Optional<std::string> resolveDylibPath(llvm::StringRef path);
 
 llvm::Optional<DylibFile *> loadDylib(llvm::MemoryBufferRef mbref,
-                                      DylibFile *umbrella = nullptr);
+                                      DylibFile *umbrella = nullptr,
+                                      bool isBundleLoader = false);
 
 llvm::Optional<InputFile *> loadArchiveMember(MemoryBufferRef, uint32_t modTime,
                                               StringRef archiveName,

diff  --git a/lld/MachO/DriverUtils.cpp b/lld/MachO/DriverUtils.cpp
index 836b86346ab8..6d9413ea4e57 100644
--- a/lld/MachO/DriverUtils.cpp
+++ b/lld/MachO/DriverUtils.cpp
@@ -176,7 +176,8 @@ Optional<std::string> macho::resolveDylibPath(StringRef path) {
 static DenseMap<CachedHashStringRef, DylibFile *> loadedDylibs;
 
 Optional<DylibFile *> macho::loadDylib(MemoryBufferRef mbref,
-                                       DylibFile *umbrella) {
+                                       DylibFile *umbrella,
+                                       bool isBundleLoader) {
   StringRef path = mbref.getBufferIdentifier();
   DylibFile *&file = loadedDylibs[CachedHashStringRef(path)];
   if (file)
@@ -190,11 +191,13 @@ Optional<DylibFile *> macho::loadDylib(MemoryBufferRef mbref,
             ": " + toString(result.takeError()));
       return {};
     }
-    file = make<DylibFile>(**result, umbrella);
+    file = make<DylibFile>(**result, umbrella, isBundleLoader);
   } else {
     assert(magic == file_magic::macho_dynamically_linked_shared_lib ||
-           magic == file_magic::macho_dynamically_linked_shared_lib_stub);
-    file = make<DylibFile>(mbref, umbrella);
+           magic == file_magic::macho_dynamically_linked_shared_lib_stub ||
+           magic == file_magic::macho_executable ||
+           magic == file_magic::macho_bundle);
+    file = make<DylibFile>(mbref, umbrella, isBundleLoader);
   }
   return file;
 }

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 41b375575a0d..c10ae9159a36 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -605,8 +605,11 @@ void loadReexport(StringRef path, DylibFile *umbrella) {
     inputFiles.insert(*reexport);
 }
 
-DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
-    : InputFile(DylibKind, mb), refState(RefState::Unreferenced) {
+DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
+                     bool isBundleLoader)
+    : InputFile(DylibKind, mb), refState(RefState::Unreferenced),
+      isBundleLoader(isBundleLoader) {
+  assert(!isBundleLoader || !umbrella);
   if (umbrella == nullptr)
     umbrella = this;
 
@@ -619,7 +622,9 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
     currentVersion = read32le(&c->dylib.current_version);
     compatibilityVersion = read32le(&c->dylib.compatibility_version);
     dylibName = reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name);
-  } else {
+  } else if (!isBundleLoader) {
+    // macho_executable and macho_bundle don't have LC_ID_DYLIB,
+    // so it's OK.
     error("dylib " + toString(this) + " missing LC_ID_DYLIB load command");
     return;
   }
@@ -658,8 +663,12 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
   }
 }
 
-DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella)
-    : InputFile(DylibKind, interface), refState(RefState::Unreferenced) {
+DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
+                     bool isBundleLoader)
+    : InputFile(DylibKind, interface), refState(RefState::Unreferenced),
+      isBundleLoader(isBundleLoader) {
+  // FIXME: Add test for the missing TBD code path.
+
   if (umbrella == nullptr)
     umbrella = this;
 

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index ef573145f594..10935050584d 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -125,20 +125,28 @@ class DylibFile : public InputFile {
   // the root dylib to ensure symbols in the child library are correctly bound
   // to the root. On the other hand, if a dylib is being directly loaded
   // (through an -lfoo flag), then `umbrella` should be a nullptr.
-  explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella = nullptr);
+  explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella = nullptr,
+                     bool isBundleLoader = false);
 
   explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
-                     DylibFile *umbrella = nullptr);
+                     DylibFile *umbrella = nullptr,
+                     bool isBundleLoader = false);
 
   static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
 
   StringRef dylibName;
   uint32_t compatibilityVersion = 0;
   uint32_t currentVersion = 0;
-  uint64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
+  int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
   RefState refState;
   bool reexport = false;
   bool forceWeakImport = false;
+
+  // An executable can be used as a bundle loader that will load the output
+  // file being linked, and that contains symbols referenced, but not
+  // implemented in the bundle. When used like this, it is very similar
+  // to a Dylib, so we re-used the same class to represent it.
+  bool isBundleLoader;
 };
 
 // .a file

diff  --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index e2313a8d189e..615f07974f53 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -396,7 +396,6 @@ def grp_bundle : OptionGroup<"bundle">, HelpText<"CREATING A BUNDLE">;
 def bundle_loader : Separate<["-"], "bundle_loader">,
      MetaVarName<"<executable>">,
      HelpText<"Resolve undefined symbols from <executable>">,
-     Flags<[HelpHidden]>,
      Group<grp_bundle>;
 
 def grp_object : OptionGroup<"object">, HelpText<"CREATING AN OBJECT FILE">;

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 0026cd04dc9a..977a7af970fb 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -227,7 +227,7 @@ struct Binding {
   OutputSegment *segment = nullptr;
   uint64_t offset = 0;
   int64_t addend = 0;
-  uint8_t ordinal = 0;
+  int16_t ordinal = 0;
 };
 } // namespace
 
@@ -274,18 +274,24 @@ static void encodeBinding(const Symbol *sym, const OutputSection *osec,
 }
 
 // Non-weak bindings need to have their dylib ordinal encoded as well.
-static void encodeDylibOrdinal(const DylibSymbol *dysym, Binding &lastBinding,
+static void encodeDylibOrdinal(const DylibSymbol *dysym, Binding *lastBinding,
                                raw_svector_ostream &os) {
   using namespace llvm::MachO;
-  if (lastBinding.ordinal != dysym->getFile()->ordinal) {
-    if (dysym->getFile()->ordinal <= BIND_IMMEDIATE_MASK) {
+  if (lastBinding == nullptr ||
+      lastBinding->ordinal != dysym->getFile()->ordinal) {
+    if (dysym->getFile()->ordinal <= 0) {
+      os << static_cast<uint8_t>(
+          BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
+          (dysym->getFile()->ordinal & BIND_IMMEDIATE_MASK));
+    } else if (dysym->getFile()->ordinal <= BIND_IMMEDIATE_MASK) {
       os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
                                  dysym->getFile()->ordinal);
     } else {
       os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
       encodeULEB128(dysym->getFile()->ordinal, os);
     }
-    lastBinding.ordinal = dysym->getFile()->ordinal;
+    if (lastBinding != nullptr)
+      lastBinding->ordinal = dysym->getFile()->ordinal;
   }
 }
 
@@ -321,7 +327,7 @@ void BindingSection::finalizeContents() {
     return a.target.getVA() < b.target.getVA();
   });
   for (const BindingEntry &b : bindings) {
-    encodeDylibOrdinal(b.dysym, lastBinding, os);
+    encodeDylibOrdinal(b.dysym, &lastBinding, os);
     if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) {
       encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset,
                     b.addend, /*isWeakBinding=*/false, lastBinding, os);
@@ -543,13 +549,7 @@ uint32_t LazyBindingSection::encode(const DylibSymbol &sym) {
   uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr +
                     sym.stubsIndex * WordSize;
   encodeULEB128(offset, os);
-  if (sym.getFile()->ordinal <= MachO::BIND_IMMEDIATE_MASK) {
-    os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
-                               sym.getFile()->ordinal);
-  } else {
-    os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
-    encodeULEB128(sym.getFile()->ordinal, os);
-  }
+  encodeDylibOrdinal(&sym, nullptr, os);
 
   uint8_t flags = MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
   if (sym.isWeakRef())
@@ -810,7 +810,12 @@ void SymtabSection::writeTo(uint8_t *buf) const {
       nList->n_desc |= defined->isExternalWeakDef() ? MachO::N_WEAK_DEF : 0;
     } else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) {
       uint16_t n_desc = nList->n_desc;
-      MachO::SET_LIBRARY_ORDINAL(n_desc, dysym->getFile()->ordinal);
+      if (dysym->getFile()->isBundleLoader)
+        MachO::SET_LIBRARY_ORDINAL(n_desc, MachO::EXECUTABLE_ORDINAL);
+      else
+        MachO::SET_LIBRARY_ORDINAL(
+            n_desc, static_cast<uint8_t>(dysym->getFile()->ordinal));
+
       nList->n_type = MachO::N_EXT;
       n_desc |= dysym->isWeakRef() ? MachO::N_WEAK_REF : 0;
       nList->n_desc = n_desc;

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 010ff827f0b8..b1c7d8c71009 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -497,9 +497,18 @@ void Writer::createLoadCommands() {
 
   in.header->addLoadCommand(make<LCBuildVersion>(config->platform));
 
-  uint64_t dylibOrdinal = 1;
+  int64_t dylibOrdinal = 1;
   for (InputFile *file : inputFiles) {
     if (auto *dylibFile = dyn_cast<DylibFile>(file)) {
+      if (dylibFile->isBundleLoader) {
+        dylibFile->ordinal = MachO::BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
+        // Shortcut since bundle-loader does not re-export the symbols.
+
+        dylibFile->reexport = false;
+        continue;
+      }
+
+      dylibFile->ordinal = dylibOrdinal++;
       LoadCommandType lcType =
           dylibFile->forceWeakImport || dylibFile->refState == RefState::Weak
               ? LC_LOAD_WEAK_DYLIB
@@ -507,7 +516,6 @@ void Writer::createLoadCommands() {
       in.header->addLoadCommand(make<LCDylib>(lcType, dylibFile->dylibName,
                                               dylibFile->compatibilityVersion,
                                               dylibFile->currentVersion));
-      dylibFile->ordinal = dylibOrdinal++;
 
       if (dylibFile->reexport)
         in.header->addLoadCommand(

diff  --git a/lld/test/MachO/bundle-loader.s b/lld/test/MachO/bundle-loader.s
new file mode 100644
index 000000000000..29ce230009b0
--- /dev/null
+++ b/lld/test/MachO/bundle-loader.s
@@ -0,0 +1,52 @@
+# REQUIRES: x86
+
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/main.s -o %t/main.o
+
+# RUN: %lld  -lSystem -dylib -install_name %t/my_lib.dylib -o %t/mylib.dylib %t/2.o
+# RUN: %lld %t/2.o %t/main.o -o %t/main
+# RUN: %lld -lSystem -bundle -bundle_loader %t/main -o %t/bundle.bundle %t/3.o %t/mylib.dylib
+# Check bundle.bundle to ensure the `my_func` symbol is from executable
+# RUN: llvm-nm -m %t/bundle.bundle |  FileCheck %s --check-prefix BUNDLE
+# BUNDLE: (undefined) external _main (from executable)
+# BUNDLE: (undefined) external my_func (from executable)
+# RUN: llvm-objdump  --macho --lazy-bind %t/bundle.bundle | FileCheck %s --check-prefix BUNDLE-OBJ
+# BUNDLE-OBJ: segment  section             address            dylib                 symbol
+# BUNDLE-OBJ: __DATA   __la_symbol_ptr     0x{{[0-9a-f]*}}    main-executable       my_fun
+
+
+# RUN: %lld -lSystem -bundle -bundle_loader %t/main -o %t/bundle2.bundle %t/3.o %t/2.o
+# Check bundle2.bundle to ensure that _main is still from executable
+# but my_func is not.
+# RUN: llvm-nm -m %t/bundle2.bundle | FileCheck %s --check-prefix BUNDLE2
+# BUNDLE2: (undefined) external _main (from executable)
+# BUNDLE2: (__TEXT,__text) external my_func
+
+# Test that bundle_loader can only be used with MachO bundle output.
+# RUN: not %lld -lSystem -bundle_loader %t/main -o %t/bundle3.bundle 2>&1 | FileCheck %s --check-prefix ERROR
+# ERROR: -bundle_loader can only be used with MachO bundle output
+
+#--- 2.s
+# my_lib: This contains the exported function
+.globl my_func
+my_func:
+  retq
+
+#--- 3.s
+# my_user.s: This is the user/caller of the
+#            exported function
+.text
+my_user:
+  callq my_func()
+  retq
+
+#--- main.s
+# main.s: dummy exec/main loads the exported function.
+# This is basically a way to say `my_user` should get
+# `my_func` from this executable.
+.globl _main
+.text
+ _main:
+  retq


        


More information about the llvm-commits mailing list