[lld] b84d72d - [lld-macho][NFC] Handle GOT bindings and regular bindings more uniformly

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 26 19:27:00 PDT 2020


Author: Jez Ng
Date: 2020-08-26T19:21:04-07:00
New Revision: b84d72d89324eeeec2336a4f4ff26bc5f0a39905

URL: https://github.com/llvm/llvm-project/commit/b84d72d89324eeeec2336a4f4ff26bc5f0a39905
DIFF: https://github.com/llvm/llvm-project/commit/b84d72d89324eeeec2336a4f4ff26bc5f0a39905.diff

LOG: [lld-macho][NFC] Handle GOT bindings and regular bindings more uniformly

Previously, the BindingEntry struct could only store bindings to offsets
within InputSections. Since the GOTSection and TLVPointerSections are
OutputSections, I handled those in a separate code path. However, this
makes it awkward to support weak bindings properly without code
duplication. This diff allows BindingEntries to point directly to
OutputSections, simplifying the upcoming weak binding implementation.

Along the way, I also converted a bunch of functions taking references
to symbols to take pointers instead. Given how much casting we do for
Symbol (especially in the upcoming weak binding diffs), it's cleaner
this way.

Differential Revision: https://reviews.llvm.org/D86571

Added: 
    lld/test/MachO/no-unneeded-dyld-info.s

Modified: 
    lld/MachO/Arch/X86_64.cpp
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/MachO/Target.h
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index cdfb8b871803..0c5abd6cc4f5 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -34,7 +34,7 @@ struct X86_64 : TargetInfo {
   void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
                             uint64_t entryAddr) const override;
 
-  void prepareSymbolRelocation(lld::macho::Symbol &, const InputSection *,
+  void prepareSymbolRelocation(lld::macho::Symbol *, const InputSection *,
                                const Reloc &) override;
   uint64_t resolveSymbolVA(uint8_t *buf, const lld::macho::Symbol &,
                            uint8_t type) const override;
@@ -217,25 +217,25 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
                    in.stubHelper->addr);
 }
 
-void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym,
+void X86_64::prepareSymbolRelocation(lld::macho::Symbol *sym,
                                      const InputSection *isec, const Reloc &r) {
   switch (r.type) {
   case X86_64_RELOC_GOT_LOAD:
     // TODO: implement mov -> lea relaxation for non-dynamic symbols
   case X86_64_RELOC_GOT:
     in.got->addEntry(sym);
-    if (sym.isTlv())
+    if (sym->isTlv())
       error("found GOT relocation referencing thread-local variable in " +
             toString(isec));
     break;
   case X86_64_RELOC_BRANCH: {
     // TODO: weak dysyms should go into the weak binding section instead
-    if (auto *dysym = dyn_cast<DylibSymbol>(&sym))
-      in.stubs->addEntry(*dysym);
+    if (auto *dysym = dyn_cast<DylibSymbol>(sym))
+      in.stubs->addEntry(dysym);
     break;
   }
   case X86_64_RELOC_UNSIGNED: {
-    if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) {
+    if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
       if (r.length != 3) {
         error("X86_64_RELOC_UNSIGNED referencing the dynamic symbol " +
               dysym->getName() + " must have r_length = 3");
@@ -251,17 +251,17 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym,
   case X86_64_RELOC_SIGNED_4:
     break;
   case X86_64_RELOC_TLV:
-    if (isa<DylibSymbol>(&sym)) {
+    if (isa<DylibSymbol>(sym)) {
       in.tlvPointers->addEntry(sym);
     } else {
-      assert(isa<Defined>(&sym));
+      assert(isa<Defined>(sym));
       // TLV relocations on x86_64 are always used with a movq opcode, which
       // can be converted to leaq opcodes if they reference a defined symbol.
       // (This is in contrast to GOT relocations, which can be used with
       // non-movq opcodes.) As such, there is no need to add an entry to
       // tlvPointers here.
     }
-    if (!sym.isTlv())
+    if (!sym->isTlv())
       error(
           "found X86_64_RELOC_TLV referencing a non-thread-local variable in " +
           toString(isec));

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 9876b9ddad37..c5c9bf3ea414 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -89,10 +89,13 @@ NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname,
   flags = MachO::S_NON_LAZY_SYMBOL_POINTERS;
 }
 
-void NonLazyPointerSectionBase::addEntry(Symbol &sym) {
-  if (entries.insert(&sym)) {
-    assert(sym.gotIndex == UINT32_MAX);
-    sym.gotIndex = entries.size() - 1;
+void NonLazyPointerSectionBase::addEntry(Symbol *sym) {
+  if (entries.insert(sym)) {
+    assert(sym->gotIndex == UINT32_MAX);
+    sym->gotIndex = entries.size() - 1;
+
+    if (auto *dysym = dyn_cast<DylibSymbol>(sym))
+      in.binding->addEntry(dysym, this, sym->gotIndex * WordSize);
   }
 }
 
@@ -105,11 +108,6 @@ void NonLazyPointerSectionBase::writeTo(uint8_t *buf) const {
 BindingSection::BindingSection()
     : LinkEditSection(segment_names::linkEdit, section_names::binding) {}
 
-bool BindingSection::isNeeded() const {
-  return bindings.size() != 0 || in.got->isNeeded() ||
-         in.tlvPointers->isNeeded();
-}
-
 namespace {
 struct Binding {
   OutputSegment *segment = nullptr;
@@ -125,7 +123,7 @@ struct Binding {
 // The bind opcode "interpreter" remembers the values of each binding field, so
 // we only need to encode the 
diff erences between bindings. Hence the use of
 // lastBinding.
-static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
+static void encodeBinding(const DylibSymbol *dysym, const OutputSection *osec,
                           uint64_t outSecOff, int64_t addend,
                           Binding &lastBinding, raw_svector_ostream &os) {
   using namespace llvm::MachO;
@@ -143,15 +141,15 @@ static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
     lastBinding.offset = offset;
   }
 
-  if (lastBinding.ordinal != dysym.file->ordinal) {
-    if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) {
+  if (lastBinding.ordinal != dysym->file->ordinal) {
+    if (dysym->file->ordinal <= BIND_IMMEDIATE_MASK) {
       os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
-                                 dysym.file->ordinal);
+                                 dysym->file->ordinal);
     } else {
       os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
-      encodeULEB128(dysym.file->ordinal, os);
+      encodeULEB128(dysym->file->ordinal, os);
     }
-    lastBinding.ordinal = dysym.file->ordinal;
+    lastBinding.ordinal = dysym->file->ordinal;
   }
 
   if (lastBinding.addend != addend) {
@@ -161,27 +159,18 @@ static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
   }
 
   os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
-     << dysym.getName() << '\0'
+     << dysym->getName() << '\0'
      << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
      << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
   // DO_BIND causes dyld to both perform the binding and increment the offset
   lastBinding.offset += WordSize;
 }
 
-static bool encodeNonLazyPointerSection(NonLazyPointerSectionBase *osec,
-                                        Binding &lastBinding,
-                                        raw_svector_ostream &os) {
-  bool didEncode = false;
-  size_t idx = 0;
-  for (const Symbol *sym : osec->getEntries()) {
-    if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
-      didEncode = true;
-      encodeBinding(*dysym, osec, idx * WordSize, /*addend=*/0, lastBinding,
-                    os);
-    }
-    ++idx;
-  }
-  return didEncode;
+uint64_t BindingEntry::getVA() const {
+  if (auto *isec = section.dyn_cast<const InputSection *>())
+    return isec->getVA() + offset;
+  auto *osec = section.get<const OutputSection *>();
+  return osec->addr + offset;
 }
 
 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
@@ -200,30 +189,23 @@ static bool encodeNonLazyPointerSection(NonLazyPointerSectionBase *osec,
 void BindingSection::finalizeContents() {
   raw_svector_ostream os{contents};
   Binding lastBinding;
-  bool didEncode = encodeNonLazyPointerSection(in.got, lastBinding, os);
-  didEncode |= encodeNonLazyPointerSection(in.tlvPointers, lastBinding, os);
 
-  // Sorting the relocations by segment and address allows us to encode them
-  // more compactly.
+  // Since bindings are delta-encoded, sorting them allows for a more compact
+  // result. Note that sorting by address alone ensures that bindings for the
+  // same segment / section are located together.
   llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
-    OutputSegment *segA = a.isec->parent->parent;
-    OutputSegment *segB = b.isec->parent->parent;
-    if (segA != segB)
-      return segA->fileOff < segB->fileOff;
-    OutputSection *osecA = a.isec->parent;
-    OutputSection *osecB = b.isec->parent;
-    if (osecA != osecB)
-      return osecA->addr < osecB->addr;
-    if (a.isec != b.isec)
-      return a.isec->outSecOff < b.isec->outSecOff;
-    return a.offset < b.offset;
+    return a.getVA() < b.getVA();
   });
   for (const BindingEntry &b : bindings) {
-    didEncode = true;
-    encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset,
-                  b.addend, lastBinding, os);
+    if (auto *isec = b.section.dyn_cast<const InputSection *>()) {
+      encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.offset, b.addend,
+                    lastBinding, os);
+    } else {
+      auto *osec = b.section.get<const OutputSection *>();
+      encodeBinding(b.dysym, osec, b.offset, b.addend, lastBinding, os);
+    }
   }
-  if (didEncode)
+  if (!bindings.empty())
     os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
 }
 
@@ -246,9 +228,9 @@ void StubsSection::writeTo(uint8_t *buf) const {
   }
 }
 
-void StubsSection::addEntry(DylibSymbol &sym) {
-  if (entries.insert(&sym))
-    sym.stubsIndex = entries.size() - 1;
+void StubsSection::addEntry(DylibSymbol *sym) {
+  if (entries.insert(sym))
+    sym->stubsIndex = entries.size() - 1;
 }
 
 StubHelperSection::StubHelperSection()
@@ -279,7 +261,7 @@ void StubHelperSection::setup() {
           "Needed to perform lazy binding.");
     return;
   }
-  in.got->addEntry(*stubBinder);
+  in.got->addEntry(stubBinder);
 
   inputSections.push_back(in.imageLoaderCache);
   symtab->addDefined("__dyld_private", in.imageLoaderCache, 0,

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index a900ef287331..9918fbcb432e 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -16,6 +16,7 @@
 #include "OutputSegment.h"
 #include "Target.h"
 
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -113,7 +114,7 @@ class NonLazyPointerSectionBase : public SyntheticSection {
 
   void writeTo(uint8_t *buf) const override;
 
-  void addEntry(Symbol &sym);
+  void addEntry(Symbol *sym);
 
 private:
   llvm::SetVector<const Symbol *> entries;
@@ -136,14 +137,20 @@ class TlvPointerSection : public NonLazyPointerSectionBase {
                                   section_names::threadPtrs) {}
 };
 
+using SectionPointerUnion =
+    llvm::PointerUnion<const InputSection *, const OutputSection *>;
+
 struct BindingEntry {
   const DylibSymbol *dysym;
-  const InputSection *isec;
+  SectionPointerUnion section;
   uint64_t offset;
   int64_t addend;
-  BindingEntry(const DylibSymbol *dysym, const InputSection *isec,
+
+  BindingEntry(const DylibSymbol *dysym, SectionPointerUnion section,
                uint64_t offset, int64_t addend)
-      : dysym(dysym), isec(isec), offset(offset), addend(addend) {}
+      : dysym(dysym), section(section), offset(offset), addend(addend) {}
+
+  uint64_t getVA() const;
 };
 
 // Stores bind opcodes for telling dyld which symbols to load non-lazily.
@@ -156,12 +163,12 @@ class BindingSection : public LinkEditSection {
   // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in
   // section headers.
   bool isHidden() const override { return true; }
-  bool isNeeded() const override;
+  bool isNeeded() const override { return !bindings.empty(); }
   void writeTo(uint8_t *buf) const override;
 
-  void addEntry(const DylibSymbol *dysym, const InputSection *isec,
-                uint64_t offset, int64_t addend) {
-    bindings.emplace_back(dysym, isec, offset, addend);
+  void addEntry(const DylibSymbol *dysym, SectionPointerUnion section,
+                uint64_t offset, int64_t addend = 0) {
+    bindings.emplace_back(dysym, section, offset, addend);
   }
 
 private:
@@ -200,7 +207,7 @@ class StubsSection : public SyntheticSection {
 
   const llvm::SetVector<DylibSymbol *> &getEntries() const { return entries; }
 
-  void addEntry(DylibSymbol &sym);
+  void addEntry(DylibSymbol *sym);
 
 private:
   llvm::SetVector<DylibSymbol *> entries;

diff  --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index cbee6afa6b61..a2efe506864d 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -54,7 +54,7 @@ class TargetInfo {
   // GOT/stubs entries, and resolveSymbolVA() will return the addresses of those
   // entries. resolveSymbolVA() may also relax the target instructions to save
   // on a level of address indirection.
-  virtual void prepareSymbolRelocation(Symbol &, const InputSection *,
+  virtual void prepareSymbolRelocation(Symbol *, const InputSection *,
                                        const Reloc &) = 0;
   virtual uint64_t resolveSymbolVA(uint8_t *buf, const Symbol &,
                                    uint8_t type) const = 0;

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 3593ff692c3d..5a56762450a1 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -314,7 +314,7 @@ void Writer::scanRelocations() {
           error("undefined symbol " + s->getName() + ", referenced from " +
                 sys::path::filename(isec->file->getName()));
         else
-          target->prepareSymbolRelocation(*s, isec, r);
+          target->prepareSymbolRelocation(s, isec, r);
       }
     }
   }

diff  --git a/lld/test/MachO/no-unneeded-dyld-info.s b/lld/test/MachO/no-unneeded-dyld-info.s
new file mode 100644
index 000000000000..11a31594ec0c
--- /dev/null
+++ b/lld/test/MachO/no-unneeded-dyld-info.s
@@ -0,0 +1,19 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: lld -flavor darwinnew -o %t %t.o
+# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s
+
+# CHECK:                 cmd LC_DYLD_INFO_ONLY
+# CHECK-NEXT:        cmdsize 48
+# CHECK-NEXT:     rebase_off 0
+# CHECK-NEXT:    rebase_size 0
+# CHECK-NEXT:       bind_off 0
+# CHECK-NEXT:      bind_size 0
+# CHECK-NEXT:  weak_bind_off 0
+# CHECK-NEXT: weak_bind_size 0
+# CHECK-NEXT:  lazy_bind_off 0
+# CHECK-NEXT: lazy_bind_size 0
+
+.globl _main
+_main:
+  ret


        


More information about the llvm-commits mailing list