[lld] cafb6cd - [lld/mac] Add some support for dynamic lookup symbols, and implement -U

Nico Weber via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 26 13:51:02 PST 2021


Author: Nico Weber
Date: 2021-02-26T16:50:53-05:00
New Revision: cafb6cd10c7434fdb541a40eda89fc51880f43c0

URL: https://github.com/llvm/llvm-project/commit/cafb6cd10c7434fdb541a40eda89fc51880f43c0
DIFF: https://github.com/llvm/llvm-project/commit/cafb6cd10c7434fdb541a40eda89fc51880f43c0.diff

LOG: [lld/mac] Add some support for dynamic lookup symbols, and implement -U

Dynamic lookup symbols are symbols that work like dynamic symbols
in ELF: They're not bound to a dylib like normal Mach-O twolevel lookup
symbols, but they live in a global pool and dyld resolves them against
exported symbols from all loaded dylibs.

This adds support for dynamical lookup symbols to lld/mac. They are
represented as DylibSymbols with file set to nullptr.

This also uses this support to implement the -U flag, which makes
a specific symbol that's undefined at the end of the link a
dynamic lookup symbol.

For -U, it'd be sufficient to just to a pass over remaining undefined symbols
at the end of the link and to replace them with dynamic lookup symbols then.
But I'd like to use this code to implement flat_namespace too, and that will
require real support for resolving dynamic lookup symbols in SymbolTable. So
this patch adds this now already.

While writing tests for this, I noticed that we didn't set N_WEAK_DEF in the
symbol table for DylibSymbols, so this fixes that too.

Differential Revision: https://reviews.llvm.org/D97521

Added: 
    lld/test/MachO/U-dynamic-lookup.s

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/Options.td
    lld/MachO/SymbolTable.cpp
    lld/MachO/SymbolTable.h
    lld/MachO/Symbols.h
    lld/MachO/SyntheticSections.cpp
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index da99379c7198..df0eaac21e0c 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -758,6 +758,10 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
     config->explicitUndefineds.push_back(symtab->addUndefined(
         arg->getValue(), /*file=*/nullptr, /*isWeakRef=*/false));
   }
+
+  for (auto *arg : args.filtered(OPT_U))
+    symtab->addDynamicLookup(arg->getValue());
+
   config->outputFile = args.getLastArgValue(OPT_o, "a.out");
   config->installName =
       args.getLastArgValue(OPT_install_name, config->outputFile);

diff  --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 615f07974f53..743b9cb08e0b 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -460,7 +460,6 @@ def u : Separate<["-"], "u">,
 def U : Separate<["-"], "U">,
      MetaVarName<"<symbol>">,
      HelpText<"Allow <symbol> to have no definition">,
-     Flags<[HelpHidden]>,
      Group<grp_resolve>;
 def undefined : Separate<["-"], "undefined">,
      MetaVarName<"<treatment>">,

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 78e807b99b91..96ed3ef1e069 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -131,13 +131,20 @@ Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
     }
   }
 
+  bool isDynamicLookup = file == nullptr;
   if (wasInserted || isa<Undefined>(s) ||
-      (isa<DylibSymbol>(s) && !isWeakDef && s->isWeakDef()))
+      (isa<DylibSymbol>(s) &&
+       ((!isWeakDef && s->isWeakDef()) ||
+        (!isDynamicLookup && cast<DylibSymbol>(s)->isDynamicLookup()))))
     replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, refState, isTlv);
 
   return s;
 }
 
+Symbol *SymbolTable::addDynamicLookup(StringRef name) {
+  return addDylib(name, /*file=*/nullptr, /*isWeakDef=*/false, /*isTlv=*/false);
+}
+
 Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
                              const object::Archive::Symbol &sym) {
   Symbol *s;
@@ -158,7 +165,7 @@ Symbol *SymbolTable::addDSOHandle(const MachHeaderSection *header) {
   if (!wasInserted) {
     // FIXME: Make every symbol (including absolute symbols) contain a
     // reference to their originating file, then add that file name to this
-    // error message.
+    // error message. dynamic_lookup symbols don't have an originating file.
     if (isa<Defined>(s))
       error("found defined symbol with illegal name " + DSOHandle::name);
   }

diff  --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index 15a784429856..38e21617c0aa 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -43,6 +43,7 @@ class SymbolTable {
                     bool isPrivateExtern);
 
   Symbol *addDylib(StringRef name, DylibFile *file, bool isWeakDef, bool isTlv);
+  Symbol *addDynamicLookup(StringRef name);
 
   Symbol *addLazy(StringRef name, ArchiveFile *file,
                   const llvm::object::Archive::Symbol &sym);

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 32996015c5b5..dfd0378dea19 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -194,8 +194,13 @@ class DylibSymbol : public Symbol {
   bool isWeakRef() const override { return refState == RefState::Weak; }
   bool isReferenced() const { return refState != RefState::Unreferenced; }
   bool isTlv() const override { return tlv; }
+  bool isDynamicLookup() const { return file == nullptr; }
   bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
-  DylibFile *getFile() const { return cast<DylibFile>(file); }
+
+  DylibFile *getFile() const {
+    assert(!isDynamicLookup());
+    return cast<DylibFile>(file);
+  }
 
   static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
 

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 2a71308c84ad..7fd7a4918c1a 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -282,21 +282,25 @@ static void encodeBinding(const Symbol *sym, const OutputSection *osec,
 static void encodeDylibOrdinal(const DylibSymbol *dysym, Binding *lastBinding,
                                raw_svector_ostream &os) {
   using namespace llvm::MachO;
+
+  int16_t ordinal = dysym->isDynamicLookup() ? BIND_SPECIAL_DYLIB_FLAT_LOOKUP
+                                             : dysym->getFile()->ordinal;
+
   if (lastBinding == nullptr ||
-      lastBinding->ordinal != dysym->getFile()->ordinal) {
-    if (dysym->getFile()->ordinal <= 0) {
+      lastBinding->ordinal != ordinal) {
+    if (ordinal <= 0) {
       os << static_cast<uint8_t>(
           BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
-          (dysym->getFile()->ordinal & BIND_IMMEDIATE_MASK));
-    } else if (dysym->getFile()->ordinal <= BIND_IMMEDIATE_MASK) {
+          (ordinal & BIND_IMMEDIATE_MASK));
+    } else if (ordinal <= BIND_IMMEDIATE_MASK) {
       os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
-                                 dysym->getFile()->ordinal);
+                                 ordinal);
     } else {
       os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
-      encodeULEB128(dysym->getFile()->ordinal, os);
+      encodeULEB128(ordinal, os);
     }
     if (lastBinding != nullptr)
-      lastBinding->ordinal = dysym->getFile()->ordinal;
+      lastBinding->ordinal = ordinal;
   }
 }
 
@@ -815,13 +819,16 @@ void SymtabSection::writeTo(uint8_t *buf) const {
       nList->n_desc |= defined->isExternalWeakDef() ? MachO::N_WEAK_DEF : 0;
     } else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) {
       uint16_t n_desc = nList->n_desc;
-      if (dysym->getFile()->isBundleLoader)
+      if (dysym->isDynamicLookup())
+        MachO::SET_LIBRARY_ORDINAL(n_desc, MachO::DYNAMIC_LOOKUP_ORDINAL);
+      else if (dysym->getFile()->isBundleLoader)
         MachO::SET_LIBRARY_ORDINAL(n_desc, MachO::EXECUTABLE_ORDINAL);
       else
         MachO::SET_LIBRARY_ORDINAL(
             n_desc, static_cast<uint8_t>(dysym->getFile()->ordinal));
 
       nList->n_type = MachO::N_EXT;
+      n_desc |= dysym->isWeakDef() ? MachO::N_WEAK_DEF : 0;
       n_desc |= dysym->isWeakRef() ? MachO::N_WEAK_REF : 0;
       nList->n_desc = n_desc;
     }

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 71f3a64eb7c4..a0b3ac4082af 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -472,6 +472,8 @@ void Writer::scanSymbols() {
       if (defined->overridesWeakDef)
         in.weakBinding->addNonWeakDefinition(defined);
     } else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
+      if (dysym->isDynamicLookup())
+        continue;
       dysym->getFile()->refState =
           std::max(dysym->getFile()->refState, dysym->refState);
     }

diff  --git a/lld/test/MachO/U-dynamic-lookup.s b/lld/test/MachO/U-dynamic-lookup.s
new file mode 100644
index 000000000000..823078e9a505
--- /dev/null
+++ b/lld/test/MachO/U-dynamic-lookup.s
@@ -0,0 +1,90 @@
+# REQUIRES: x86
+# RUN: rm -rf %t
+# RUN: split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/foo.o %t/foo.s
+# RUN: %lld -dylib -o %t/foo.dylib %t/foo.o
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s
+
+# _foo starts out as a (non-weak) dynamically looked up symbol and is merged
+# against the Undefined from foo.o. _bar isn't referenced in any object file,
+# but starts out as Undefined because of the -u flag. _baz isn't referenced
+# at all.
+# RUN: %lld -lSystem %t/main.o -U _foo -U _bar -u _bar -U _baz -o %t/out
+# RUN: llvm-objdump --macho --lazy-bind %t/out | FileCheck --check-prefix=DYNAMIC %s
+# RUN: llvm-nm -m %t/out | FileCheck --check-prefix=DYNAMICSYM %s
+
+# Same thing should happen if _foo starts out as an Undefined.
+# `-U _foo` being passed twice shouldn't have an effect either.
+# RUN: %lld -lSystem %t/main.o -u _foo -U _foo -U _foo -u _bar -U _bar -U _baz -o %t/out
+# RUN: llvm-objdump --macho --lazy-bind %t/out | FileCheck --check-prefix=DYNAMIC %s
+# RUN: llvm-nm -m %t/out | FileCheck --check-prefix=DYNAMICSYM %s
+
+# Unreferenced dynamic lookup symbols don't make it into the bind tables, but
+# they do make it into the symbol table in ld64 if they're an undefined from -u
+# for some reason. lld happens to have the same behavior when no explicit code
+# handles this case, so match ld64's behavior.
+
+# DYNAMIC-NOT: _bar
+# DYNAMIC-NOT: _baz
+# DYNAMIC: flat-namespace   _foo
+
+# DYNAMICSYM:      (undefined) external _bar (dynamically looked up)
+# DYNAMICSYM-NOT:      (undefined) external _bar (dynamically looked up)
+# DYNAMICSYM-NEXT: (undefined) external _foo (dynamically looked up)
+
+# Test with a Defined. Here, foo.o provides _foo and the symbol doesn't need
+# to be imported.
+# RUN: %lld -lSystem %t/main.o %t/foo.o -U _foo -o %t/out
+# RUN: llvm-objdump --macho --lazy-bind %t/out | FileCheck --check-prefix=NOTDYNAMIC %s
+
+# NOTDYNAMIC-NOT: _foo
+
+# Here, foo.dylib provides _foo and the symbol doesn't need to be imported
+# dynamically.
+# RUN: %lld -lSystem %t/main.o %t/foo.dylib -U _foo -o %t/out
+# RUN: llvm-objdump --macho --lazy-bind %t/out | FileCheck --check-prefix=TWOLEVEL %s
+# RUN: llvm-nm -m %t/out | FileCheck --check-prefix=TWOLEVELSYM %s
+
+# TWOLEVEL: foo              _foo
+# TWOLEVELSYM: (undefined) external _foo (from foo)
+
+# Test resolving dynamic lookup symbol with weak defined.
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/weak-foo.o %t/weak-foo.s
+# RUN: %lld -dylib -o %t/weak-foo.dylib %t/weak-foo.o -U _foo
+# RUN: llvm-objdump --macho --bind --lazy-bind --weak-bind %t/weak-foo.dylib | FileCheck --check-prefix=WEAKDEF %s
+# RUN: llvm-nm -m %t/weak-foo.dylib | FileCheck --check-prefix=WEAKDEFSYM %s
+# WEAKDEF-NOT: _foo
+# WEAKDEFSYM: weak external _foo
+
+# Same if foo.dylib provides _foo weakly, except that the symbol is weak then.
+# RUN: %lld -lSystem %t/main.o %t/weak-foo.dylib -U _foo -o %t/out
+# RUN: llvm-objdump --macho --bind --lazy-bind --weak-bind %t/out | FileCheck --check-prefix=TWOLEVELWEAK %s
+# RUN: llvm-nm -m %t/out | FileCheck --check-prefix=TWOLEVELWEAKSYM %s
+
+# TWOLEVELWEAK-LABEL: Bind table:
+# TWOLEVELWEAK:       __DATA        __la_symbol_ptr 0x[[#%x,ADDR:]]   pointer 0 weak-foo    _foo
+# TWOLEVELWEAK-LABEL: Lazy bind table:
+# TWOLEVELWEAK-NOT:   weak-foo         _foo
+# TWOLEVELWEAK-LABEL: Weak bind table:
+# TWOLEVELWEAK:       __DATA   __la_symbol_ptr      0x[[#ADDR]]       pointer 0 _foo
+
+# TWOLEVELWEAKSYM: (undefined) weak external _foo (from weak-foo)
+
+#--- foo.s
+.globl _foo
+_foo:
+  ret
+
+#--- weak-foo.s
+.globl _foo
+.weak_definition _foo
+_foo:
+  ret
+
+#--- main.s
+.globl _main
+_main:
+  callq _foo
+  ret


        


More information about the llvm-commits mailing list