[lld] 87b6fd3 - [lld-macho] Add support for creating and reading reexported dylibs

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Tue May 12 07:52:43 PDT 2020


Author: Jez Ng
Date: 2020-05-12T07:52:03-07:00
New Revision: 87b6fd3e02c696b917a75750dea8e33ce00246ec

URL: https://github.com/llvm/llvm-project/commit/87b6fd3e02c696b917a75750dea8e33ce00246ec
DIFF: https://github.com/llvm/llvm-project/commit/87b6fd3e02c696b917a75750dea8e33ce00246ec.diff

LOG: [lld-macho] Add support for creating and reading reexported dylibs

This unblocks the linking of real programs, since many core system
functions are only available as sub-libraries of libSystem.

Differential Revision: https://reviews.llvm.org/D79228

Added: 
    lld/test/MachO/sub-library.s

Modified: 
    lld/MachO/Config.h
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/Options.td
    lld/MachO/SyntheticSections.cpp
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 5a3566fd09ce..81832f30e0c6 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -21,9 +21,10 @@ class Symbol;
 
 struct Configuration {
   Symbol *entry;
-  llvm::MachO::HeaderFileType outputType;
+  bool hasReexports = false;
   llvm::StringRef installName;
   llvm::StringRef outputFile;
+  llvm::MachO::HeaderFileType outputType;
   std::vector<llvm::StringRef> searchPaths;
 };
 

diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 3accfee662f2..858969bb85fe 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 
 using namespace llvm;
 using namespace llvm::MachO;
@@ -115,6 +116,21 @@ static void addFile(StringRef path) {
   }
 }
 
+// We expect sub-library names of the form "libfoo", which will match a dylib
+// with a path of .*/libfoo.dylib.
+static bool markSubLibrary(StringRef searchName) {
+  for (InputFile *file : inputFiles) {
+    if (auto *dylibFile = dyn_cast<DylibFile>(file)) {
+      StringRef filename = path::filename(dylibFile->getName());
+      if (filename.consume_front(searchName) && filename == ".dylib") {
+        dylibFile->reexport = true;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
                  raw_ostream &stdoutOS, raw_ostream &stderrOS) {
   lld::stdoutOS = &stdoutOS;
@@ -158,6 +174,15 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
     }
   }
 
+  // Now that all dylibs have been loaded, search for those that should be
+  // re-exported.
+  for (opt::Arg *arg : args.filtered(OPT_sub_library)) {
+    config->hasReexports = true;
+    StringRef searchName = arg->getValue();
+    if (!markSubLibrary(searchName))
+      error("-sub_library " + searchName + " does not match a supplied dylib");
+  }
+
   // dyld requires us to load libSystem. Since we may run tests on non-OSX
   // systems which do not have libSystem, we mock it out here.
   // TODO: Replace this with a stub tbd file once we have TAPI support.

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index e836f4d79962..7bb070843696 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -42,6 +42,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InputFiles.h"
+#include "Config.h"
 #include "ExportTrie.h"
 #include "InputSection.h"
 #include "OutputSection.h"
@@ -54,10 +55,12 @@
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 
 using namespace llvm;
 using namespace llvm::MachO;
 using namespace llvm::support::endian;
+using namespace llvm::sys;
 using namespace lld;
 using namespace lld::macho;
 
@@ -236,7 +239,11 @@ ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) {
   }
 }
 
-DylibFile::DylibFile(MemoryBufferRef mb) : InputFile(DylibKind, mb) {
+DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
+    : InputFile(DylibKind, mb) {
+  if (umbrella == nullptr)
+    umbrella = this;
+
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
   auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
 
@@ -254,10 +261,34 @@ DylibFile::DylibFile(MemoryBufferRef mb) : InputFile(DylibKind, mb) {
     auto *c = reinterpret_cast<const dyld_info_command *>(cmd);
     parseTrie(buf + c->export_off, c->export_size,
               [&](const Twine &name, uint64_t flags) {
-                symbols.push_back(symtab->addDylib(saver.save(name), this));
+                symbols.push_back(symtab->addDylib(saver.save(name), umbrella));
               });
   } else {
     error("LC_DYLD_INFO_ONLY not found in " + getName());
+    return;
+  }
+
+  if (hdr->flags & MH_NO_REEXPORTED_DYLIBS)
+    return;
+
+  const uint8_t *p =
+      reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64);
+  for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
+    auto *cmd = reinterpret_cast<const load_command *>(p);
+    p += cmd->cmdsize;
+    if (cmd->cmd != LC_REEXPORT_DYLIB)
+      continue;
+
+    auto *c = reinterpret_cast<const dylib_command *>(cmd);
+    StringRef reexportPath =
+        reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
+    // TODO: Expand @loader_path, @executable_path etc in reexportPath
+    Optional<MemoryBufferRef> buffer = readFile(reexportPath);
+    if (!buffer) {
+      error("unable to read re-exported dylib at " + reexportPath);
+      return;
+    }
+    reexported.push_back(make<DylibFile>(*buffer, umbrella));
   }
 }
 

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 43bfd66e8f73..6d760d4c3b50 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -60,7 +60,14 @@ class ObjFile : public InputFile {
 // .dylib file
 class DylibFile : public InputFile {
 public:
-  explicit DylibFile(MemoryBufferRef mb);
+  // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
+  // symbols in those sub-libraries will be available under the umbrella
+  // library's namespace. Those sub-libraries can also have their own
+  // re-exports. When loading a re-exported dylib, `umbrella` should be set to
+  // the root dylib to ensure symbols in the child library are correctly bound
+  // to the root. On the other hand, if a dylib is being directly loaded
+  // (through an -lfoo flag), then `umbrella` should be a nullptr.
+  explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella = nullptr);
   static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
 
   // Do not use this constructor!! This is meant only for createLibSystemMock(),
@@ -70,6 +77,8 @@ class DylibFile : public InputFile {
 
   StringRef dylibName;
   uint64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
+  bool reexport = false;
+  std::vector<DylibFile *> reexported;
 };
 
 extern std::vector<InputFile *> inputFiles;

diff  --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 8327bb9d4abc..3068dc31a33b 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -23,6 +23,9 @@ def l: Joined<["-"], "l">, MetaVarName<"<libname>">,
 def o: Separate<["-"], "o">, MetaVarName<"<path>">,
   HelpText<"Path to file to write output">;
 
+def sub_library: Separate<["-"], "sub_library">, MetaVarName<"<libname>">,
+  HelpText<"Re-export the specified dylib">;
+
 def v: Flag<["-"], "v">, HelpText<"Display the version number and exit">;
 
 // Ignored options

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 9a7b4241b827..49f213a1e603 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -57,7 +57,7 @@ void MachHeaderSection::writeTo(uint8_t *buf) const {
   hdr->ncmds = loadCommands.size();
   hdr->sizeofcmds = sizeOfCmds;
   hdr->flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL;
-  if (config->outputType == MH_DYLIB)
+  if (config->outputType == MH_DYLIB && !config->hasReexports)
     hdr->flags |= MH_NO_REEXPORTED_DYLIBS;
 
   uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1);

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 999e63ae6773..7aa512a98687 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -191,9 +191,13 @@ class LCSymtab : public LoadCommand {
   StringTableSection *stringTableSection = nullptr;
 };
 
-class LCLoadDylib : public LoadCommand {
+// There are several dylib load commands that share the same structure:
+//   * LC_LOAD_DYLIB
+//   * LC_ID_DYLIB
+//   * LC_REEXPORT_DYLIB
+class LCDylib : public LoadCommand {
 public:
-  LCLoadDylib(StringRef path) : path(path) {}
+  LCDylib(LoadCommandType type, StringRef path) : type(type), path(path) {}
 
   uint32_t getSize() const override {
     return alignTo(sizeof(dylib_command) + path.size() + 1, 8);
@@ -203,7 +207,7 @@ class LCLoadDylib : public LoadCommand {
     auto *c = reinterpret_cast<dylib_command *>(buf);
     buf += sizeof(dylib_command);
 
-    c->cmd = LC_LOAD_DYLIB;
+    c->cmd = type;
     c->cmdsize = getSize();
     c->dylib.name = sizeof(dylib_command);
 
@@ -212,33 +216,10 @@ class LCLoadDylib : public LoadCommand {
   }
 
 private:
+  LoadCommandType type;
   StringRef path;
 };
 
-class LCIdDylib : public LoadCommand {
-public:
-  LCIdDylib(StringRef name) : name(name) {}
-
-  uint32_t getSize() const override {
-    return alignTo(sizeof(dylib_command) + name.size() + 1, 8);
-  }
-
-  void writeTo(uint8_t *buf) const override {
-    auto *c = reinterpret_cast<dylib_command *>(buf);
-    buf += sizeof(dylib_command);
-
-    c->cmd = LC_ID_DYLIB;
-    c->cmdsize = getSize();
-    c->dylib.name = sizeof(dylib_command);
-
-    memcpy(buf, name.data(), name.size());
-    buf[name.size()] = '\0';
-  }
-
-private:
-  StringRef name;
-};
-
 class LCLoadDylinker : public LoadCommand {
 public:
   uint32_t getSize() const override {
@@ -285,7 +266,8 @@ void Writer::createLoadCommands() {
     headerSection->addLoadCommand(make<LCLoadDylinker>());
     break;
   case MH_DYLIB:
-    headerSection->addLoadCommand(make<LCIdDylib>(config->installName));
+    headerSection->addLoadCommand(
+        make<LCDylib>(LC_ID_DYLIB, config->installName));
     break;
   default:
     llvm_unreachable("unhandled output file type");
@@ -300,8 +282,13 @@ void Writer::createLoadCommands() {
   uint64_t dylibOrdinal = 1;
   for (InputFile *file : inputFiles) {
     if (auto *dylibFile = dyn_cast<DylibFile>(file)) {
-      headerSection->addLoadCommand(make<LCLoadDylib>(dylibFile->dylibName));
+      headerSection->addLoadCommand(
+          make<LCDylib>(LC_LOAD_DYLIB, dylibFile->dylibName));
       dylibFile->ordinal = dylibOrdinal++;
+
+      if (dylibFile->reexport)
+        headerSection->addLoadCommand(
+            make<LCDylib>(LC_REEXPORT_DYLIB, dylibFile->dylibName));
     }
   }
 }

diff  --git a/lld/test/MachO/sub-library.s b/lld/test/MachO/sub-library.s
new file mode 100644
index 000000000000..e858eaf0bff5
--- /dev/null
+++ b/lld/test/MachO/sub-library.s
@@ -0,0 +1,74 @@
+# REQUIRES: x86
+# RUN: mkdir -p %t
+
+## Create a libsuper that has libgoodbye as a sub-library, which in turn has
+## libhello as another sub-library.
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libhello.s \
+# RUN:   -o %t/libhello.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libgoodbye.s \
+# RUN:   -o %t/libgoodbye.o
+# RUN: echo "" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/libsuper.o
+# RUN: lld -flavor darwinnew -dylib %t/libhello.o -o %t/libhello.dylib
+# RUN: lld -flavor darwinnew -dylib -L%t -sub_library libhello -lhello \
+# RUN:   %t/libgoodbye.o -o %t/libgoodbye.dylib
+# RUN: lld -flavor darwinnew -dylib -L%t -sub_library libgoodbye -lgoodbye -install_name \
+# RUN:   @executable_path/libsuper.dylib %t/libsuper.o -o %t/libsuper.dylib
+
+
+## Check that they have the appropriate LC_REEXPORT_DYLIB commands, and that
+## NO_REEXPORTED_DYLIBS is (un)set as appropriate.
+
+# RUN: llvm-objdump --macho --all-headers %t/libhello.dylib | FileCheck %s \
+# RUN:   --check-prefix=HELLO-HEADERS
+# HELLO-HEADERS: NO_REEXPORTED_DYLIBS
+
+# RUN: llvm-objdump --macho --all-headers %t/libgoodbye.dylib | FileCheck %s -DDIR=%t \
+# RUN:   --check-prefix=GOODBYE-HEADERS
+# GOODBYE-HEADERS-NOT: NO_REEXPORTED_DYLIBS
+# GOODBYE-HEADERS:     cmd     LC_REEXPORT_DYLIB
+# GOODBYE-HEADERS-NOT: Load command
+# GOODBYE-HEADERS:     name    [[DIR]]/libhello.dylib
+
+# RUN: llvm-objdump --macho --all-headers %t/libsuper.dylib | FileCheck %s -DDIR=%t \
+# RUN:   --check-prefix=SUPER-HEADERS
+# SUPER-HEADERS-NOT: NO_REEXPORTED_DYLIBS
+# SUPER-HEADERS:     cmd     LC_REEXPORT_DYLIB
+# SUPER-HEADERS-NOT: Load command
+# SUPER-HEADERS:     name    [[DIR]]/libgoodbye.dylib
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/sub-library.o
+# RUN: lld -flavor darwinnew -o %t/sub-library -L%t -lsuper %t/sub-library.o
+
+# RUN: llvm-objdump --macho --bind %t/sub-library | FileCheck %s
+# CHECK-LABEL: Bind table:
+# CHECK-DAG:   __DATA_CONST __got {{.*}} libsuper _hello_world
+# CHECK-DAG:   __DATA_CONST __got {{.*}} libsuper _goodbye_world
+
+
+## Check that we fail gracefully if the sub-library is missing
+# RUN: not lld -flavor darwinnew -dylib -Z -o %t/sub-library -sub_library libmissing %t/sub-library.o 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=MISSING-SUB-LIBRARY
+# MISSING-SUB-LIBRARY: error: -sub_library libmissing does not match a supplied dylib
+# RUN: rm -f %t/libgoodbye.dylib
+# RUN: not lld -flavor darwinnew -o %t/sub-library -Z -L%t -lsuper %t/sub-library.o 2>&1 \
+# RUN:  | FileCheck %s --check-prefix=MISSING-REEXPORT -DDIR=%t
+# MISSING-REEXPORT: error: unable to read re-exported dylib at [[DIR]]/libgoodbye.dylib
+
+.text
+.globl _main
+
+_main:
+  movl $0x2000004, %eax # write() syscall
+  mov $1, %rdi # stdout
+  movq _hello_world at GOTPCREL(%rip), %rsi
+  mov $13, %rdx # length of str
+  syscall
+  mov $0, %rax
+
+  movl $0x2000004, %eax # write() syscall
+  mov $1, %rdi # stdout
+  movq _goodbye_world at GOTPCREL(%rip), %rsi
+  mov $15, %rdx # length of str
+  syscall
+  mov $0, %rax
+  ret


        


More information about the llvm-commits mailing list