[lld] a12e7d4 - [lld-macho] Handle GOT relocations of non-dylib symbols
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 17 20:41:56 PDT 2020
Author: Jez Ng
Date: 2020-06-17T20:41:28-07:00
New Revision: a12e7d406de2c31633414e5c73881869c930d194
URL: https://github.com/llvm/llvm-project/commit/a12e7d406de2c31633414e5c73881869c930d194
DIFF: https://github.com/llvm/llvm-project/commit/a12e7d406de2c31633414e5c73881869c930d194.diff
LOG: [lld-macho] Handle GOT relocations of non-dylib symbols
Summary:
Turns out this case is actually really common -- it happens whenever there's
a reference to an `extern` variable that ends up statically linked.
Depends on D80856.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Reviewed By: smeenai
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80857
Added:
lld/test/MachO/local-got.s
Modified:
lld/MachO/Arch/X86_64.cpp
lld/MachO/InputSection.cpp
lld/MachO/Symbols.h
lld/MachO/SyntheticSections.cpp
lld/MachO/SyntheticSections.h
lld/MachO/Target.h
lld/MachO/Writer.cpp
Removed:
################################################################################
diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 6353cc241014..3ce65ad4e22f 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -34,8 +34,8 @@ struct X86_64 : TargetInfo {
void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
uint64_t entryAddr) const override;
- void prepareDylibSymbolRelocation(DylibSymbol &, uint8_t type) override;
- uint64_t getDylibSymbolVA(const DylibSymbol &, uint8_t type) const override;
+ void prepareSymbolRelocation(lld::macho::Symbol &, uint8_t type) override;
+ uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override;
};
} // namespace
@@ -208,30 +208,54 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
in.stubHelper->addr);
}
-void X86_64::prepareDylibSymbolRelocation(DylibSymbol &sym, uint8_t type) {
+void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
switch (type) {
case X86_64_RELOC_GOT_LOAD:
// TODO: implement mov -> lea relaxation for non-dynamic symbols
case X86_64_RELOC_GOT:
in.got->addEntry(sym);
break;
- case X86_64_RELOC_BRANCH:
- in.stubs->addEntry(sym);
+ case X86_64_RELOC_BRANCH: {
+ if (auto *dysym = dyn_cast<DylibSymbol>(&sym))
+ in.stubs->addEntry(*dysym);
+ break;
+ }
+ case X86_64_RELOC_UNSIGNED:
+ case X86_64_RELOC_SIGNED:
+ case X86_64_RELOC_SIGNED_1:
+ case X86_64_RELOC_SIGNED_2:
+ case X86_64_RELOC_SIGNED_4:
+ break;
+ case X86_64_RELOC_SUBTRACTOR:
+ case X86_64_RELOC_TLV:
+ fatal("TODO: handle relocation type " + std::to_string(type));
break;
default:
- llvm_unreachable("Unexpected dylib relocation type");
+ llvm_unreachable("unexpected relocation type");
}
}
-uint64_t X86_64::getDylibSymbolVA(const DylibSymbol &sym, uint8_t type) const {
+uint64_t X86_64::getSymbolVA(const lld::macho::Symbol &sym,
+ uint8_t type) const {
switch (type) {
case X86_64_RELOC_GOT_LOAD:
case X86_64_RELOC_GOT:
return in.got->addr + sym.gotIndex * WordSize;
case X86_64_RELOC_BRANCH:
- return in.stubs->addr + sym.stubsIndex * sizeof(stub);
+ if (auto *dysym = dyn_cast<DylibSymbol>(&sym))
+ return in.stubs->addr + dysym->stubsIndex * sizeof(stub);
+ return sym.getVA();
+ case X86_64_RELOC_UNSIGNED:
+ case X86_64_RELOC_SIGNED:
+ case X86_64_RELOC_SIGNED_1:
+ case X86_64_RELOC_SIGNED_2:
+ case X86_64_RELOC_SIGNED_4:
+ return sym.getVA();
+ case X86_64_RELOC_SUBTRACTOR:
+ case X86_64_RELOC_TLV:
+ fatal("TODO: handle relocation type " + std::to_string(type));
default:
- llvm_unreachable("Unexpected dylib relocation type");
+ llvm_unreachable("Unexpected relocation type");
}
}
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 3dce2aeb0139..327fd419b613 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -33,15 +33,10 @@ void InputSection::writeTo(uint8_t *buf) {
for (Reloc &r : relocs) {
uint64_t va = 0;
- if (auto *s = r.target.dyn_cast<Symbol *>()) {
- if (auto *dylibSymbol = dyn_cast<DylibSymbol>(s)) {
- va = target->getDylibSymbolVA(*dylibSymbol, r.type);
- } else {
- va = s->getVA();
- }
- } else if (auto *isec = r.target.dyn_cast<InputSection *>()) {
+ if (auto *s = r.target.dyn_cast<Symbol *>())
+ va = target->getSymbolVA(*s, r.type);
+ else if (auto *isec = r.target.dyn_cast<InputSection *>())
va = isec->getVA();
- }
uint64_t val = va + r.addend;
if (r.pcrel)
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 1fe7697289ba..63748ee48324 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -47,6 +47,8 @@ class Symbol {
uint64_t getFileOffset() const;
+ uint32_t gotIndex = UINT32_MAX;
+
protected:
Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {}
@@ -80,7 +82,6 @@ class DylibSymbol : public Symbol {
static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
DylibFile *file;
- uint32_t gotIndex = UINT32_MAX;
uint32_t stubsIndex = UINT32_MAX;
uint32_t lazyBindOffset = UINT32_MAX;
};
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 16fdbb5cc008..7237aa65331e 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -80,12 +80,18 @@ GotSection::GotSection()
// table, which we do not currently emit
}
-void GotSection::addEntry(DylibSymbol &sym) {
+void GotSection::addEntry(Symbol &sym) {
if (entries.insert(&sym)) {
sym.gotIndex = entries.size() - 1;
}
}
+void GotSection::writeTo(uint8_t *buf) const {
+ for (size_t i = 0, n = entries.size(); i < n; ++i)
+ if (auto *defined = dyn_cast<Defined>(entries[i]))
+ write64le(&buf[i * WordSize], defined->getVA());
+}
+
BindingSection::BindingSection()
: SyntheticSection(segment_names::linkEdit, section_names::binding) {}
@@ -112,20 +118,32 @@ void BindingSection::finalizeContents() {
os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
in.got->parent->index);
encodeULEB128(in.got->getSegmentOffset(), os);
- for (const DylibSymbol *sym : in.got->getEntries()) {
- // TODO: Implement compact encoding -- we only need to encode the
- //
diff erences between consecutive symbol entries.
- if (sym->file->ordinal <= BIND_IMMEDIATE_MASK) {
- os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
- sym->file->ordinal);
+ uint32_t entries_to_skip = 0;
+ for (const Symbol *sym : in.got->getEntries()) {
+ if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
+ if (entries_to_skip != 0) {
+ os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
+ encodeULEB128(WordSize * entries_to_skip, os);
+ entries_to_skip = 0;
+ }
+
+ // TODO: Implement compact encoding -- we only need to encode the
+ //
diff erences between consecutive symbol entries.
+ if (dysym->file->ordinal <= BIND_IMMEDIATE_MASK) {
+ os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
+ dysym->file->ordinal);
+ } else {
+ error("TODO: Support larger dylib symbol ordinals");
+ continue;
+ }
+ os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
+ << dysym->getName() << '\0'
+ << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
+ << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
} else {
- error("TODO: Support larger dylib symbol ordinals");
- continue;
+ // We have a defined symbol with a pre-populated address; skip over it.
+ ++entries_to_skip;
}
- os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
- << sym->getName() << '\0'
- << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
- << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
}
os << static_cast<uint8_t>(BIND_OPCODE_DONE);
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index c95f1556e918..95452538c445 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -78,23 +78,18 @@ class GotSection : public SyntheticSection {
public:
GotSection();
- const llvm::SetVector<const DylibSymbol *> &getEntries() const {
- return entries;
- }
+ const llvm::SetVector<const Symbol *> &getEntries() const { return entries; }
bool isNeeded() const override { return !entries.empty(); }
uint64_t getSize() const override { return entries.size() * WordSize; }
- void writeTo(uint8_t *buf) const override {
- // Nothing to write, GOT contains all zeros at link time; it's populated at
- // runtime by dyld.
- }
+ void writeTo(uint8_t *buf) const override;
- void addEntry(DylibSymbol &sym);
+ void addEntry(Symbol &sym);
private:
- llvm::SetVector<const DylibSymbol *> entries;
+ llvm::SetVector<const Symbol *> entries;
};
// Stores bind opcodes for telling dyld which symbols to load non-lazily.
diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index 74df6a951a90..7687fcdc66ea 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -18,6 +18,7 @@
namespace lld {
namespace macho {
+class Symbol;
class DylibSymbol;
class InputSection;
struct Reloc;
@@ -48,13 +49,12 @@ class TargetInfo {
virtual void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
uint64_t entryAddr) const = 0;
- // Dylib symbols are referenced via either the GOT or the stubs section,
- // depending on the relocation type. prepareDylibSymbolRelocation() will set
- // up the GOT/stubs entries, and getDylibSymbolVA() will return the addresses
- // of those entries.
- virtual void prepareDylibSymbolRelocation(DylibSymbol &, uint8_t type) = 0;
- virtual uint64_t getDylibSymbolVA(const DylibSymbol &,
- uint8_t type) const = 0;
+ // Symbols may be referenced via either the GOT or the stubs section,
+ // depending on the relocation type. prepareSymbolRelocation() will set up the
+ // GOT/stubs entries, and getSymbolVA() will return the addresses of those
+ // entries.
+ virtual void prepareSymbolRelocation(Symbol &, uint8_t type) = 0;
+ virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0;
uint32_t cpuType;
uint32_t cpuSubtype;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 96ede13a2b64..def82b578774 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -255,8 +255,8 @@ void Writer::scanRelocations() {
if (isa<Undefined>(s))
error("undefined symbol " + s->getName() + ", referenced from " +
sys::path::filename(isec->file->getName()));
- else if (auto *dylibSymbol = dyn_cast<DylibSymbol>(s))
- target->prepareDylibSymbolRelocation(*dylibSymbol, r.type);
+ else
+ target->prepareSymbolRelocation(*s, r.type);
}
}
}
diff --git a/lld/test/MachO/local-got.s b/lld/test/MachO/local-got.s
new file mode 100644
index 000000000000..6099a6bf18c7
--- /dev/null
+++ b/lld/test/MachO/local-got.s
@@ -0,0 +1,58 @@
+# REQUIRES: x86
+# RUN: mkdir -p %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libhello.s \
+# RUN: -o %t/libhello.o
+# RUN: lld -flavor darwinnew -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -dylib -install_name \
+# RUN: @executable_path/libhello.dylib %t/libhello.o -o %t/libhello.dylib
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
+# RUN: lld -flavor darwinnew -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/test %t/test.o -L%t -lhello
+# RUN: llvm-objdump --full-contents --bind %t/test | FileCheck %s --match-full-lines
+
+## Check that the GOT references the cstrings. --full-contents displays the
+## address offset and the contents at that address very similarly, so am using
+## --match-full-lines to make sure we match on the right thing.
+# CHECK: Contents of section __cstring:
+# CHECK-NEXT: 1000003cc {{.*}}
+
+## 1st 8 bytes refer to the start of __cstring + 0xe, 2nd 8 bytes refer to the
+## start of __cstring
+# CHECK: Contents of section __got:
+# CHECK-NEXT: [[#%X,ADDR:]] da030000 01000000 cc030000 01000000 {{.*}}
+# CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}}
+
+## Check that a non-locally-defined symbol is still bound at the correct offset:
+# CHECK: Bind table:
+# CHECK-NEXT: segment section address type addend dylib symbol
+# CHECK-NEXT: __DATA_CONST __got 0x[[#ADDR+16]] pointer 0 libhello _hello_its_me
+
+.globl _main
+
+.text
+_main:
+ movl $0x2000004, %eax # write() syscall
+ mov $1, %rdi # stdout
+ movq _hello_its_me at GOTPCREL(%rip), %rsi
+ mov $15, %rdx # length of str
+ syscall
+
+ movl $0x2000004, %eax # write() syscall
+ mov $1, %rdi # stdout
+ movq _hello_world at GOTPCREL(%rip), %rsi
+ mov $13, %rdx # length of str
+ syscall
+
+ movl $0x2000004, %eax # write() syscall
+ mov $1, %rdi # stdout
+ movq _goodbye_world at GOTPCREL(%rip), %rsi
+ mov $15, %rdx # length of str
+ syscall
+
+ mov $0, %rax
+ ret
+
+.section __TEXT,__cstring
+_hello_world:
+ .asciz "Hello world!\n"
+
+_goodbye_world:
+ .asciz "Goodbye world!\n"
More information about the llvm-commits
mailing list