[lld] 53eb7fd - [lld-macho] Support binding dysyms to any section
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 2 21:21:33 PDT 2020
Author: Jez Ng
Date: 2020-07-02T21:21:01-07:00
New Revision: 53eb7fda51f27b1b098fd6d5c9385948e891e800
URL: https://github.com/llvm/llvm-project/commit/53eb7fda51f27b1b098fd6d5c9385948e891e800
DIFF: https://github.com/llvm/llvm-project/commit/53eb7fda51f27b1b098fd6d5c9385948e891e800.diff
LOG: [lld-macho] Support binding dysyms to any section
Previously, we only supported binding dysyms to the GOT. This
diff adds support for binding them to any arbitrary section. C++
programs appear to use this, I believe for vtables and type_info.
This diff also makes our bind opcode encoding a bit smarter -- we now
encode just the differences between bindings, which will make things
more compact.
I was initially concerned about the performance overhead of iterating
over these relocations, but it turns out that the number of such
relocations is small. A quick analysis of my llvm-project build
directory showed that < 1.3% out of ~7M relocations are RELOC_UNSIGNED
bindings to symbols (including both dynamic and static symbols).
Reviewed By: #lld-macho, smeenai
Differential Revision: https://reviews.llvm.org/D83103
Added:
Modified:
lld/MachO/Arch/X86_64.cpp
lld/MachO/SyntheticSections.cpp
lld/MachO/SyntheticSections.h
lld/MachO/Target.h
lld/MachO/Writer.cpp
lld/test/MachO/dylink.s
Removed:
################################################################################
diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 3ce65ad4e22f..36f686ca2f1d 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -34,7 +34,8 @@ struct X86_64 : TargetInfo {
void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
uint64_t entryAddr) const override;
- void prepareSymbolRelocation(lld::macho::Symbol &, uint8_t type) override;
+ void prepareSymbolRelocation(lld::macho::Symbol &, const InputSection *,
+ const Reloc &) override;
uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override;
};
@@ -208,8 +209,9 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
in.stubHelper->addr);
}
-void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
- switch (type) {
+void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym,
+ const InputSection *isec, const Reloc &r) {
+ switch (r.type) {
case X86_64_RELOC_GOT_LOAD:
// TODO: implement mov -> lea relaxation for non-dynamic symbols
case X86_64_RELOC_GOT:
@@ -220,7 +222,17 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
in.stubs->addEntry(*dysym);
break;
}
- case X86_64_RELOC_UNSIGNED:
+ case X86_64_RELOC_UNSIGNED: {
+ if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) {
+ if (r.length != 3) {
+ error("X86_64_RELOC_UNSIGNED referencing the dynamic symbol " +
+ dysym->getName() + " must have r_length = 3");
+ return;
+ }
+ in.binding->addEntry(dysym, isec, r.offset, r.addend);
+ }
+ break;
+ }
case X86_64_RELOC_SIGNED:
case X86_64_RELOC_SIGNED_1:
case X86_64_RELOC_SIGNED_2:
@@ -228,7 +240,7 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
break;
case X86_64_RELOC_SUBTRACTOR:
case X86_64_RELOC_TLV:
- fatal("TODO: handle relocation type " + std::to_string(type));
+ fatal("TODO: handle relocation type " + std::to_string(r.type));
break;
default:
llvm_unreachable("unexpected relocation type");
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index f8f95ce24d41..cc0d5a93c40d 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -11,6 +11,7 @@
#include "ExportTrie.h"
#include "InputFiles.h"
#include "MachOStructs.h"
+#include "MergedOutputSection.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
@@ -95,7 +96,68 @@ void GotSection::writeTo(uint8_t *buf) const {
BindingSection::BindingSection()
: SyntheticSection(segment_names::linkEdit, section_names::binding) {}
-bool BindingSection::isNeeded() const { return in.got->isNeeded(); }
+bool BindingSection::isNeeded() const {
+ return bindings.size() != 0 || in.got->isNeeded();
+}
+
+namespace {
+struct Binding {
+ OutputSegment *segment = nullptr;
+ uint64_t offset = 0;
+ int64_t addend = 0;
+ uint8_t ordinal = 0;
+};
+} // namespace
+
+// Encode a sequence of opcodes that tell dyld to write the address of dysym +
+// addend at osec->addr + outSecOff.
+//
+// The bind opcode "interpreter" remembers the values of each binding field, so
+// we only need to encode the
diff erences between bindings. Hence the use of
+// lastBinding.
+static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
+ uint64_t outSecOff, int64_t addend,
+ Binding &lastBinding, raw_svector_ostream &os) {
+ using namespace llvm::MachO;
+ OutputSegment *seg = osec->parent;
+ uint64_t offset = osec->getSegmentOffset() + outSecOff;
+ if (lastBinding.segment != seg) {
+ os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
+ seg->index);
+ encodeULEB128(offset, os);
+ lastBinding.segment = seg;
+ lastBinding.offset = offset;
+ } else if (lastBinding.offset != offset) {
+ assert(lastBinding.offset <= offset);
+ os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
+ encodeULEB128(offset - lastBinding.offset, os);
+ lastBinding.offset = offset;
+ }
+
+ if (lastBinding.ordinal != dysym.file->ordinal) {
+ if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) {
+ os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
+ dysym.file->ordinal);
+ } else {
+ error("TODO: Support larger dylib symbol ordinals");
+ return;
+ }
+ lastBinding.ordinal = dysym.file->ordinal;
+ }
+
+ if (lastBinding.addend != addend) {
+ os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB);
+ encodeSLEB128(addend, os);
+ lastBinding.addend = addend;
+ }
+
+ os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
+ << dysym.getName() << '\0'
+ << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
+ << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
+ // DO_BIND causes dyld to both perform the binding and increment the offset
+ lastBinding.offset += WordSize;
+}
// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
// interprets to update a record with the following fields:
@@ -111,44 +173,40 @@ bool BindingSection::isNeeded() const { return in.got->isNeeded(); }
// entry. It does *not* clear the record state after doing the bind, so
// subsequent opcodes only need to encode the
diff erences between bindings.
void BindingSection::finalizeContents() {
- if (!isNeeded())
- return;
-
raw_svector_ostream os{contents};
- os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
- in.got->parent->index);
- encodeULEB128(in.got->getSegmentOffset(), os);
- uint32_t entries_to_skip = 0;
+ Binding lastBinding;
+ bool didEncode = false;
+ size_t gotIdx = 0;
for (const Symbol *sym : in.got->getEntries()) {
if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
- if (entries_to_skip != 0) {
- os << static_cast<uint8_t>(MachO::BIND_OPCODE_ADD_ADDR_ULEB);
- encodeULEB128(WordSize * entries_to_skip, os);
- entries_to_skip = 0;
- }
-
- // TODO: Implement compact encoding -- we only need to encode the
- //
diff erences between consecutive symbol entries.
- if (dysym->file->ordinal <= MachO::BIND_IMMEDIATE_MASK) {
- os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
- dysym->file->ordinal);
- } else {
- error("TODO: Support larger dylib symbol ordinals");
- continue;
- }
- os << static_cast<uint8_t>(
- MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
- << dysym->getName() << '\0'
- << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_TYPE_IMM |
- MachO::BIND_TYPE_POINTER)
- << static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND);
- } else {
- // We have a defined symbol with a pre-populated address; skip over it.
- ++entries_to_skip;
+ didEncode = true;
+ encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os);
}
+ ++gotIdx;
}
- os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
+ // Sorting the relocations by segment and address allows us to encode them
+ // more compactly.
+ llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
+ OutputSegment *segA = a.isec->parent->parent;
+ OutputSegment *segB = b.isec->parent->parent;
+ if (segA != segB)
+ return segA->fileOff < segB->fileOff;
+ OutputSection *osecA = a.isec->parent;
+ OutputSection *osecB = b.isec->parent;
+ if (osecA != osecB)
+ return osecA->addr < osecB->addr;
+ if (a.isec != b.isec)
+ return a.isec->outSecOff < b.isec->outSecOff;
+ return a.offset < b.offset;
+ });
+ for (const BindingEntry &b : bindings) {
+ didEncode = true;
+ encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset,
+ b.addend, lastBinding, os);
+ }
+ if (didEncode)
+ os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
}
void BindingSection::writeTo(uint8_t *buf) const {
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 7033369904f6..a8fbf6c8a265 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -94,6 +94,16 @@ class GotSection : public SyntheticSection {
llvm::SetVector<const Symbol *> entries;
};
+struct BindingEntry {
+ const DylibSymbol *dysym;
+ const InputSection *isec;
+ uint64_t offset;
+ int64_t addend;
+ BindingEntry(const DylibSymbol *dysym, const InputSection *isec,
+ uint64_t offset, int64_t addend)
+ : dysym(dysym), isec(isec), offset(offset), addend(addend) {}
+};
+
// Stores bind opcodes for telling dyld which symbols to load non-lazily.
class BindingSection : public SyntheticSection {
public:
@@ -107,6 +117,13 @@ class BindingSection : public SyntheticSection {
bool isNeeded() const override;
void writeTo(uint8_t *buf) const override;
+ void addEntry(const DylibSymbol *dysym, const InputSection *isec,
+ uint64_t offset, int64_t addend) {
+ bindings.emplace_back(dysym, isec, offset, addend);
+ }
+
+private:
+ std::vector<BindingEntry> bindings;
SmallVector<char, 128> contents;
};
@@ -256,6 +273,7 @@ class SymtabSection : public SyntheticSection {
};
struct InStruct {
+ BindingSection *binding = nullptr;
GotSection *got = nullptr;
LazyPointerSection *lazyPointers = nullptr;
StubsSection *stubs = nullptr;
diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index 7687fcdc66ea..8ea1bde12307 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -53,7 +53,8 @@ class TargetInfo {
// depending on the relocation type. prepareSymbolRelocation() will set up the
// GOT/stubs entries, and getSymbolVA() will return the addresses of those
// entries.
- virtual void prepareSymbolRelocation(Symbol &, uint8_t type) = 0;
+ virtual void prepareSymbolRelocation(Symbol &, const InputSection *,
+ const Reloc &) = 0;
virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0;
uint32_t cpuType;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index c88e314e556d..03000a7f437e 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -54,7 +54,6 @@ class Writer {
uint64_t addr = 0;
uint64_t fileOff = 0;
MachHeaderSection *headerSection = nullptr;
- BindingSection *bindingSection = nullptr;
LazyBindingSection *lazyBindingSection = nullptr;
ExportSection *exportSection = nullptr;
StringTableSection *stringTableSection = nullptr;
@@ -254,7 +253,7 @@ void Writer::scanRelocations() {
error("undefined symbol " + s->getName() + ", referenced from " +
sys::path::filename(isec->file->getName()));
else
- target->prepareSymbolRelocation(*s, r.type);
+ target->prepareSymbolRelocation(*s, isec, r);
}
}
}
@@ -262,7 +261,7 @@ void Writer::scanRelocations() {
void Writer::createLoadCommands() {
headerSection->addLoadCommand(
- make<LCDyldInfo>(bindingSection, lazyBindingSection, exportSection));
+ make<LCDyldInfo>(in.binding, lazyBindingSection, exportSection));
headerSection->addLoadCommand(
make<LCSymtab>(symtabSection, stringTableSection));
headerSection->addLoadCommand(make<LCDysymtab>());
@@ -404,7 +403,6 @@ static void sortSegmentsAndSections() {
void Writer::createOutputSections() {
// First, create hidden sections
headerSection = make<MachHeaderSection>();
- bindingSection = make<BindingSection>();
lazyBindingSection = make<LazyBindingSection>();
stringTableSection = make<StringTableSection>();
symtabSection = make<SymtabSection>(*stringTableSection);
@@ -513,7 +511,7 @@ void Writer::run() {
assignAddresses(seg);
// Fill __LINKEDIT contents.
- bindingSection->finalizeContents();
+ in.binding->finalizeContents();
lazyBindingSection->finalizeContents();
exportSection->finalizeContents();
symtabSection->finalizeContents();
@@ -535,6 +533,7 @@ void Writer::run() {
void macho::writeResult() { Writer().run(); }
void macho::createSyntheticSections() {
+ in.binding = make<BindingSection>();
in.got = make<GotSection>();
in.lazyPointers = make<LazyPointerSection>();
in.stubs = make<StubsSection>();
diff --git a/lld/test/MachO/dylink.s b/lld/test/MachO/dylink.s
index 1909e380fc6a..0549aab98292 100644
--- a/lld/test/MachO/dylink.s
+++ b/lld/test/MachO/dylink.s
@@ -31,9 +31,12 @@
# CHECK-NEXT: [[#%x, GOODBYE_RIP:]]: popq %rsi
# CHECK-LABEL: Bind table:
-# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]] pointer 0 libhello _hello_world
-# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_ITS_ME_RIP + HELLO_ITS_ME_OFF]] pointer 0 libhello _hello_its_me
-# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]] pointer 0 libgoodbye _goodbye_world
+# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]] pointer 0 libhello _hello_world
+# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_ITS_ME_RIP + HELLO_ITS_ME_OFF]] pointer 0 libhello _hello_its_me
+# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]] pointer 0 libgoodbye _goodbye_world
+# CHECK-DAG: __DATA __data 0x[[#%x, DATA_ADDR:]] pointer 0 libhello _hello_world
+# CHECK-DAG: __DATA __data 0x{{0*}}[[#%x, DATA_ADDR + 8]] pointer 8 libhello _hello_its_me
+# CHECK-DAG: __DATA __data 0x{{0*}}[[#%x, DATA_ADDR + 16]] pointer -15 libgoodbye _goodbye_world
.section __TEXT,__text
.globl _main
@@ -59,3 +62,8 @@ _main:
syscall
mov $0, %rax
ret
+
+.data
+.quad _hello_world
+.quad _hello_its_me + 0x8
+.quad _goodbye_world - 0xf
More information about the llvm-commits
mailing list