[lld] 53eb7fd - [lld-macho] Support binding dysyms to any section

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 2 21:21:33 PDT 2020


Author: Jez Ng
Date: 2020-07-02T21:21:01-07:00
New Revision: 53eb7fda51f27b1b098fd6d5c9385948e891e800

URL: https://github.com/llvm/llvm-project/commit/53eb7fda51f27b1b098fd6d5c9385948e891e800
DIFF: https://github.com/llvm/llvm-project/commit/53eb7fda51f27b1b098fd6d5c9385948e891e800.diff

LOG: [lld-macho] Support binding dysyms to any section

Previously, we only supported binding dysyms to the GOT. This
diff adds support for binding them to any arbitrary section. C++
programs appear to use this, I believe for vtables and type_info.

This diff also makes our bind opcode encoding a bit smarter -- we now
encode just the differences between bindings, which will make things
more compact.

I was initially concerned about the performance overhead of iterating
over these relocations, but it turns out that the number of such
relocations is small. A quick analysis of my llvm-project build
directory showed that < 1.3% out of ~7M relocations are RELOC_UNSIGNED
bindings to symbols (including both dynamic and static symbols).

Reviewed By: #lld-macho, smeenai

Differential Revision: https://reviews.llvm.org/D83103

Added: 
    

Modified: 
    lld/MachO/Arch/X86_64.cpp
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/MachO/Target.h
    lld/MachO/Writer.cpp
    lld/test/MachO/dylink.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 3ce65ad4e22f..36f686ca2f1d 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -34,7 +34,8 @@ struct X86_64 : TargetInfo {
   void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
                             uint64_t entryAddr) const override;
 
-  void prepareSymbolRelocation(lld::macho::Symbol &, uint8_t type) override;
+  void prepareSymbolRelocation(lld::macho::Symbol &, const InputSection *,
+                               const Reloc &) override;
   uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override;
 };
 
@@ -208,8 +209,9 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
                    in.stubHelper->addr);
 }
 
-void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
-  switch (type) {
+void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym,
+                                     const InputSection *isec, const Reloc &r) {
+  switch (r.type) {
   case X86_64_RELOC_GOT_LOAD:
     // TODO: implement mov -> lea relaxation for non-dynamic symbols
   case X86_64_RELOC_GOT:
@@ -220,7 +222,17 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
       in.stubs->addEntry(*dysym);
     break;
   }
-  case X86_64_RELOC_UNSIGNED:
+  case X86_64_RELOC_UNSIGNED: {
+    if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) {
+      if (r.length != 3) {
+        error("X86_64_RELOC_UNSIGNED referencing the dynamic symbol " +
+              dysym->getName() + " must have r_length = 3");
+        return;
+      }
+      in.binding->addEntry(dysym, isec, r.offset, r.addend);
+    }
+    break;
+  }
   case X86_64_RELOC_SIGNED:
   case X86_64_RELOC_SIGNED_1:
   case X86_64_RELOC_SIGNED_2:
@@ -228,7 +240,7 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
     break;
   case X86_64_RELOC_SUBTRACTOR:
   case X86_64_RELOC_TLV:
-    fatal("TODO: handle relocation type " + std::to_string(type));
+    fatal("TODO: handle relocation type " + std::to_string(r.type));
     break;
   default:
     llvm_unreachable("unexpected relocation type");

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index f8f95ce24d41..cc0d5a93c40d 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -11,6 +11,7 @@
 #include "ExportTrie.h"
 #include "InputFiles.h"
 #include "MachOStructs.h"
+#include "MergedOutputSection.h"
 #include "OutputSegment.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
@@ -95,7 +96,68 @@ void GotSection::writeTo(uint8_t *buf) const {
 BindingSection::BindingSection()
     : SyntheticSection(segment_names::linkEdit, section_names::binding) {}
 
-bool BindingSection::isNeeded() const { return in.got->isNeeded(); }
+bool BindingSection::isNeeded() const {
+  return bindings.size() != 0 || in.got->isNeeded();
+}
+
+namespace {
+struct Binding {
+  OutputSegment *segment = nullptr;
+  uint64_t offset = 0;
+  int64_t addend = 0;
+  uint8_t ordinal = 0;
+};
+} // namespace
+
+// Encode a sequence of opcodes that tell dyld to write the address of dysym +
+// addend at osec->addr + outSecOff.
+//
+// The bind opcode "interpreter" remembers the values of each binding field, so
+// we only need to encode the 
diff erences between bindings. Hence the use of
+// lastBinding.
+static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
+                          uint64_t outSecOff, int64_t addend,
+                          Binding &lastBinding, raw_svector_ostream &os) {
+  using namespace llvm::MachO;
+  OutputSegment *seg = osec->parent;
+  uint64_t offset = osec->getSegmentOffset() + outSecOff;
+  if (lastBinding.segment != seg) {
+    os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
+                               seg->index);
+    encodeULEB128(offset, os);
+    lastBinding.segment = seg;
+    lastBinding.offset = offset;
+  } else if (lastBinding.offset != offset) {
+    assert(lastBinding.offset <= offset);
+    os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
+    encodeULEB128(offset - lastBinding.offset, os);
+    lastBinding.offset = offset;
+  }
+
+  if (lastBinding.ordinal != dysym.file->ordinal) {
+    if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) {
+      os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
+                                 dysym.file->ordinal);
+    } else {
+      error("TODO: Support larger dylib symbol ordinals");
+      return;
+    }
+    lastBinding.ordinal = dysym.file->ordinal;
+  }
+
+  if (lastBinding.addend != addend) {
+    os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB);
+    encodeSLEB128(addend, os);
+    lastBinding.addend = addend;
+  }
+
+  os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
+     << dysym.getName() << '\0'
+     << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
+     << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
+  // DO_BIND causes dyld to both perform the binding and increment the offset
+  lastBinding.offset += WordSize;
+}
 
 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
 // interprets to update a record with the following fields:
@@ -111,44 +173,40 @@ bool BindingSection::isNeeded() const { return in.got->isNeeded(); }
 // entry. It does *not* clear the record state after doing the bind, so
 // subsequent opcodes only need to encode the 
diff erences between bindings.
 void BindingSection::finalizeContents() {
-  if (!isNeeded())
-    return;
-
   raw_svector_ostream os{contents};
-  os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
-                             in.got->parent->index);
-  encodeULEB128(in.got->getSegmentOffset(), os);
-  uint32_t entries_to_skip = 0;
+  Binding lastBinding;
+  bool didEncode = false;
+  size_t gotIdx = 0;
   for (const Symbol *sym : in.got->getEntries()) {
     if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
-      if (entries_to_skip != 0) {
-        os << static_cast<uint8_t>(MachO::BIND_OPCODE_ADD_ADDR_ULEB);
-        encodeULEB128(WordSize * entries_to_skip, os);
-        entries_to_skip = 0;
-      }
-
-      // TODO: Implement compact encoding -- we only need to encode the
-      // 
diff erences between consecutive symbol entries.
-      if (dysym->file->ordinal <= MachO::BIND_IMMEDIATE_MASK) {
-        os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
-                                   dysym->file->ordinal);
-      } else {
-        error("TODO: Support larger dylib symbol ordinals");
-        continue;
-      }
-      os << static_cast<uint8_t>(
-                MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
-         << dysym->getName() << '\0'
-         << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_TYPE_IMM |
-                                 MachO::BIND_TYPE_POINTER)
-         << static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND);
-    } else {
-      // We have a defined symbol with a pre-populated address; skip over it.
-      ++entries_to_skip;
+      didEncode = true;
+      encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os);
     }
+    ++gotIdx;
   }
 
-  os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
+  // Sorting the relocations by segment and address allows us to encode them
+  // more compactly.
+  llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
+    OutputSegment *segA = a.isec->parent->parent;
+    OutputSegment *segB = b.isec->parent->parent;
+    if (segA != segB)
+      return segA->fileOff < segB->fileOff;
+    OutputSection *osecA = a.isec->parent;
+    OutputSection *osecB = b.isec->parent;
+    if (osecA != osecB)
+      return osecA->addr < osecB->addr;
+    if (a.isec != b.isec)
+      return a.isec->outSecOff < b.isec->outSecOff;
+    return a.offset < b.offset;
+  });
+  for (const BindingEntry &b : bindings) {
+    didEncode = true;
+    encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset,
+                  b.addend, lastBinding, os);
+  }
+  if (didEncode)
+    os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
 }
 
 void BindingSection::writeTo(uint8_t *buf) const {

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 7033369904f6..a8fbf6c8a265 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -94,6 +94,16 @@ class GotSection : public SyntheticSection {
   llvm::SetVector<const Symbol *> entries;
 };
 
+struct BindingEntry {
+  const DylibSymbol *dysym;
+  const InputSection *isec;
+  uint64_t offset;
+  int64_t addend;
+  BindingEntry(const DylibSymbol *dysym, const InputSection *isec,
+               uint64_t offset, int64_t addend)
+      : dysym(dysym), isec(isec), offset(offset), addend(addend) {}
+};
+
 // Stores bind opcodes for telling dyld which symbols to load non-lazily.
 class BindingSection : public SyntheticSection {
 public:
@@ -107,6 +117,13 @@ class BindingSection : public SyntheticSection {
   bool isNeeded() const override;
   void writeTo(uint8_t *buf) const override;
 
+  void addEntry(const DylibSymbol *dysym, const InputSection *isec,
+                uint64_t offset, int64_t addend) {
+    bindings.emplace_back(dysym, isec, offset, addend);
+  }
+
+private:
+  std::vector<BindingEntry> bindings;
   SmallVector<char, 128> contents;
 };
 
@@ -256,6 +273,7 @@ class SymtabSection : public SyntheticSection {
 };
 
 struct InStruct {
+  BindingSection *binding = nullptr;
   GotSection *got = nullptr;
   LazyPointerSection *lazyPointers = nullptr;
   StubsSection *stubs = nullptr;

diff  --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index 7687fcdc66ea..8ea1bde12307 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -53,7 +53,8 @@ class TargetInfo {
   // depending on the relocation type. prepareSymbolRelocation() will set up the
   // GOT/stubs entries, and getSymbolVA() will return the addresses of those
   // entries.
-  virtual void prepareSymbolRelocation(Symbol &, uint8_t type) = 0;
+  virtual void prepareSymbolRelocation(Symbol &, const InputSection *,
+                                       const Reloc &) = 0;
   virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0;
 
   uint32_t cpuType;

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index c88e314e556d..03000a7f437e 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -54,7 +54,6 @@ class Writer {
   uint64_t addr = 0;
   uint64_t fileOff = 0;
   MachHeaderSection *headerSection = nullptr;
-  BindingSection *bindingSection = nullptr;
   LazyBindingSection *lazyBindingSection = nullptr;
   ExportSection *exportSection = nullptr;
   StringTableSection *stringTableSection = nullptr;
@@ -254,7 +253,7 @@ void Writer::scanRelocations() {
           error("undefined symbol " + s->getName() + ", referenced from " +
                 sys::path::filename(isec->file->getName()));
         else
-          target->prepareSymbolRelocation(*s, r.type);
+          target->prepareSymbolRelocation(*s, isec, r);
       }
     }
   }
@@ -262,7 +261,7 @@ void Writer::scanRelocations() {
 
 void Writer::createLoadCommands() {
   headerSection->addLoadCommand(
-      make<LCDyldInfo>(bindingSection, lazyBindingSection, exportSection));
+      make<LCDyldInfo>(in.binding, lazyBindingSection, exportSection));
   headerSection->addLoadCommand(
       make<LCSymtab>(symtabSection, stringTableSection));
   headerSection->addLoadCommand(make<LCDysymtab>());
@@ -404,7 +403,6 @@ static void sortSegmentsAndSections() {
 void Writer::createOutputSections() {
   // First, create hidden sections
   headerSection = make<MachHeaderSection>();
-  bindingSection = make<BindingSection>();
   lazyBindingSection = make<LazyBindingSection>();
   stringTableSection = make<StringTableSection>();
   symtabSection = make<SymtabSection>(*stringTableSection);
@@ -513,7 +511,7 @@ void Writer::run() {
       assignAddresses(seg);
 
   // Fill __LINKEDIT contents.
-  bindingSection->finalizeContents();
+  in.binding->finalizeContents();
   lazyBindingSection->finalizeContents();
   exportSection->finalizeContents();
   symtabSection->finalizeContents();
@@ -535,6 +533,7 @@ void Writer::run() {
 void macho::writeResult() { Writer().run(); }
 
 void macho::createSyntheticSections() {
+  in.binding = make<BindingSection>();
   in.got = make<GotSection>();
   in.lazyPointers = make<LazyPointerSection>();
   in.stubs = make<StubsSection>();

diff  --git a/lld/test/MachO/dylink.s b/lld/test/MachO/dylink.s
index 1909e380fc6a..0549aab98292 100644
--- a/lld/test/MachO/dylink.s
+++ b/lld/test/MachO/dylink.s
@@ -31,9 +31,12 @@
 # CHECK-NEXT: [[#%x, GOODBYE_RIP:]]: popq %rsi
 
 # CHECK-LABEL: Bind table:
-# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]]               pointer 0 libhello   _hello_world
-# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_ITS_ME_RIP + HELLO_ITS_ME_OFF]] pointer 0 libhello   _hello_its_me
-# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]]           pointer 0 libgoodbye _goodbye_world
+# CHECK-DAG: __DATA_CONST __got  0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]]               pointer 0   libhello   _hello_world
+# CHECK-DAG: __DATA_CONST __got  0x{{0*}}[[#%x, HELLO_ITS_ME_RIP + HELLO_ITS_ME_OFF]] pointer 0   libhello   _hello_its_me
+# CHECK-DAG: __DATA_CONST __got  0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]]           pointer 0   libgoodbye _goodbye_world
+# CHECK-DAG: __DATA       __data 0x[[#%x, DATA_ADDR:]]                                pointer 0   libhello   _hello_world
+# CHECK-DAG: __DATA       __data 0x{{0*}}[[#%x, DATA_ADDR + 8]]                       pointer 8   libhello   _hello_its_me
+# CHECK-DAG: __DATA       __data 0x{{0*}}[[#%x, DATA_ADDR + 16]]                      pointer -15 libgoodbye _goodbye_world
 
 .section __TEXT,__text
 .globl _main
@@ -59,3 +62,8 @@ _main:
   syscall
   mov $0, %rax
   ret
+
+.data
+.quad _hello_world
+.quad _hello_its_me + 0x8
+.quad _goodbye_world - 0xf


        


More information about the llvm-commits mailing list