[lld] 7bbdbac - [lld-macho] Use export trie instead of symtab when linking against dylibs

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Sat May 9 20:58:22 PDT 2020


Author: Jez Ng
Date: 2020-05-09T20:56:22-07:00
New Revision: 7bbdbacd00ae04f9c0d609bf2c00036cafb55fef

URL: https://github.com/llvm/llvm-project/commit/7bbdbacd00ae04f9c0d609bf2c00036cafb55fef
DIFF: https://github.com/llvm/llvm-project/commit/7bbdbacd00ae04f9c0d609bf2c00036cafb55fef.diff

LOG: [lld-macho] Use export trie instead of symtab when linking against dylibs

Summary:
This allows us to link against stripped dylibs. Moreover, it's simply
more correct: The symbol table includes symbols that the dylib uses but
doesn't export.

This temporarily regresses our ability to do lazy symbol binding because
dyld_stub_binder isn't in libSystem's export trie. Rather, it is in one
of the sub-libraries libSystem re-exports. (This doesn't affect our
tests since we are mocking out dyld_stub_binder there.) A follow-up diff
will address this by adding support for sub-libraries.

Depends on D79114.

Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee

Subscribers: mgorny, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D79226

Added: 
    

Modified: 
    lld/MachO/ExportTrie.cpp
    lld/MachO/ExportTrie.h
    lld/MachO/InputFiles.cpp
    lld/test/CMakeLists.txt
    lld/test/MachO/dylink.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/ExportTrie.cpp b/lld/MachO/ExportTrie.cpp
index 871cf334d616..de8b42d73281 100644
--- a/lld/MachO/ExportTrie.cpp
+++ b/lld/MachO/ExportTrie.cpp
@@ -37,6 +37,7 @@
 #include "ExportTrie.h"
 #include "Symbols.h"
 
+#include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/BinaryFormat/MachO.h"
@@ -232,5 +233,57 @@ void TrieBuilder::writeTo(uint8_t *buf) const {
     node->writeTo(buf);
 }
 
+namespace {
+
+// Parse a serialized trie and invoke a callback for each entry.
+class TrieParser {
+public:
+  TrieParser(const uint8_t *buf, size_t size, const TrieEntryCallback &callback)
+      : start(buf), end(start + size), callback(callback) {}
+
+  void parse(const uint8_t *buf, const Twine &cumulativeString);
+
+  void parse() { parse(start, ""); }
+
+  const uint8_t *start;
+  const uint8_t *end;
+  const TrieEntryCallback &callback;
+};
+
+} // namespace
+
+void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) {
+  if (buf >= end)
+    fatal("Node offset points outside export section");
+
+  unsigned ulebSize;
+  uint64_t terminalSize = decodeULEB128(buf, &ulebSize);
+  buf += ulebSize;
+  uint64_t flags = 0;
+  size_t offset;
+  if (terminalSize != 0) {
+    flags = decodeULEB128(buf, &ulebSize);
+    callback(cumulativeString, flags);
+  }
+  buf += terminalSize;
+  uint8_t numEdges = *buf++;
+  for (uint8_t i = 0; i < numEdges; ++i) {
+    const char *cbuf = reinterpret_cast<const char *>(buf);
+    StringRef substring = StringRef(cbuf, strnlen(cbuf, end - buf));
+    buf += substring.size() + 1;
+    offset = decodeULEB128(buf, &ulebSize);
+    buf += ulebSize;
+    parse(start + offset, cumulativeString + substring);
+  }
+}
+
+void parseTrie(const uint8_t *buf, size_t size,
+               const TrieEntryCallback &callback) {
+  if (size == 0)
+    return;
+
+  TrieParser(buf, size, callback).parse();
+}
+
 } // namespace macho
 } // namespace lld

diff  --git a/lld/MachO/ExportTrie.h b/lld/MachO/ExportTrie.h
index a85728c59955..2bd8c33db9a0 100644
--- a/lld/MachO/ExportTrie.h
+++ b/lld/MachO/ExportTrie.h
@@ -10,6 +10,7 @@
 #define LLD_MACHO_EXPORT_TRIE_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 
 #include <vector>
 
@@ -35,6 +36,11 @@ class TrieBuilder {
   std::vector<TrieNode *> nodes;
 };
 
+using TrieEntryCallback =
+    llvm::function_ref<void(const llvm::Twine & /*name*/, uint64_t /*flags*/)>;
+
+void parseTrie(const uint8_t *buf, size_t size, const TrieEntryCallback &);
+
 } // namespace macho
 } // namespace lld
 

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index eb8a30f753f4..f8fdd76ff50f 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -42,6 +42,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InputFiles.h"
+#include "ExportTrie.h"
 #include "InputSection.h"
 #include "OutputSection.h"
 #include "SymbolTable.h"
@@ -245,21 +246,14 @@ DylibFile::DylibFile(MemoryBufferRef mb) : InputFile(DylibKind, mb) {
   }
 
   // Initialize symbols.
-  if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
-    auto *c = reinterpret_cast<const symtab_command *>(cmd);
-    const char *strtab = reinterpret_cast<const char *>(buf + c->stroff);
-    ArrayRef<const nlist_64> nList(
-        reinterpret_cast<const nlist_64 *>(buf + c->symoff), c->nsyms);
-
-    symbols.reserve(c->nsyms);
-
-    for (const nlist_64 &sym : nList) {
-      StringRef name = strtab + sym.n_strx;
-      // TODO: Figure out what to do about undefined symbols: ignore or warn
-      // if unsatisfied? Also make sure we handle re-exported symbols
-      // correctly.
-      symbols.push_back(symtab->addDylib(name, this));
-    }
+  if (const load_command *cmd = findCommand(hdr, LC_DYLD_INFO_ONLY)) {
+    auto *c = reinterpret_cast<const dyld_info_command *>(cmd);
+    parseTrie(buf + c->export_off, c->export_size,
+              [&](const Twine &name, uint64_t flags) {
+                symbols.push_back(symtab->addDylib(saver.save(name), this));
+              });
+  } else {
+    error("LC_DYLD_INFO_ONLY not found in " + getName());
   }
 }
 

diff  --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt
index 150865245965..4fbd2534b5a9 100644
--- a/lld/test/CMakeLists.txt
+++ b/lld/test/CMakeLists.txt
@@ -36,8 +36,8 @@ if (NOT LLD_BUILT_STANDALONE)
   list(APPEND LLD_TEST_DEPS
     FileCheck count llc llvm-ar llvm-as llvm-bcanalyzer llvm-config llvm-cvtres
     llvm-dis llvm-dwarfdump llvm-lib llvm-lipo llvm-mc llvm-nm llvm-objcopy
-    llvm-objdump llvm-pdbutil llvm-readelf llvm-readobj not obj2yaml opt
-    yaml2obj
+    llvm-objdump llvm-pdbutil llvm-readelf llvm-readobj llvm-strip not obj2yaml
+    opt yaml2obj
     )
 endif()
 

diff  --git a/lld/test/MachO/dylink.s b/lld/test/MachO/dylink.s
index 26302013035d..e47d9ef8f271 100644
--- a/lld/test/MachO/dylink.s
+++ b/lld/test/MachO/dylink.s
@@ -8,6 +8,15 @@
 # RUN:   @executable_path/libhello.dylib %t/libhello.o -o %t/libhello.dylib
 # RUN: lld -flavor darwinnew -dylib -install_name \
 # RUN:   @executable_path/libgoodbye.dylib %t/libgoodbye.o -o %t/libgoodbye.dylib
+
+## Make sure we are using the export trie and not the symbol table when linking
+## against these dylibs.
+# RUN: llvm-strip %t/libhello.dylib
+# RUN: llvm-strip %t/libgoodbye.dylib
+# RUN: llvm-nm %t/libhello.dylib 2>&1 | FileCheck %s --check-prefix=NOSYM
+# RUN: llvm-nm %t/libgoodbye.dylib 2>&1 | FileCheck %s --check-prefix=NOSYM
+# NOSYM: no symbols
+
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/dylink.o
 # RUN: lld -flavor darwinnew -o %t/dylink -Z -L%t -lhello -lgoodbye %t/dylink.o
 # RUN: llvm-objdump --bind -d %t/dylink | FileCheck %s


        


More information about the llvm-commits mailing list