[lld] d8283d9 - [lld-macho][nfc] Give every SyntheticSection a fake InputSection
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 12 14:26:43 PST 2021
Author: Jez Ng
Date: 2021-03-12T17:26:27-05:00
New Revision: d8283d9ddcc5e9ffda7e72ae122df279b2c5ee17
URL: https://github.com/llvm/llvm-project/commit/d8283d9ddcc5e9ffda7e72ae122df279b2c5ee17
DIFF: https://github.com/llvm/llvm-project/commit/d8283d9ddcc5e9ffda7e72ae122df279b2c5ee17.diff
LOG: [lld-macho][nfc] Give every SyntheticSection a fake InputSection
Previously, it was difficult to write code that handled both synthetic
and regular sections generically. We solve this problem by creating a
fake InputSection at the start of every SyntheticSection.
This refactor allows us to handle DSOHandle like a regular Defined
symbol (since Defined symbols must be attached to an InputSection), and
paves the way for supporting `__mh_*header` symbols. Additionally, it
simplifies our binding/rebase code.
I did have to extend Defined a little -- it now has a `linkerInternal`
flag, to indicate that `___dso_handle` should not be in the final symbol
table.
I've also added some additional testing for `___dso_handle`.
Reviewed By: #lld-macho, oontvoo
Differential Revision: https://reviews.llvm.org/D98545
Added:
Modified:
lld/MachO/Driver.cpp
lld/MachO/SymbolTable.cpp
lld/MachO/SymbolTable.h
lld/MachO/Symbols.cpp
lld/MachO/Symbols.h
lld/MachO/SyntheticSections.cpp
lld/MachO/SyntheticSections.h
lld/test/MachO/dso-handle.s
lld/test/MachO/invalid/dso-handle-duplicate.s
Removed:
################################################################################
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 84a96f7799b6..3674cd927ffa 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1031,7 +1031,14 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
}
createSyntheticSections();
- symtab->addDSOHandle(in.header);
+
+ // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
+ // which does e.g. cleanup of static global variables. The ABI document says
+ // that the pointer can point to any address in one of the dylib's segments,
+ // but in practice ld64 seems to set it to point to the header, so that's
+ // what's implemented here.
+ symtab->addSynthetic("___dso_handle", in.header->isec, 0,
+ /*privateExtern=*/true, /*linkerInternal=*/true);
for (const Arg *arg : args.filtered(OPT_sectcreate)) {
StringRef segName = arg->getValue(0);
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 1a772ba811ca..61a19b0af427 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -37,9 +37,9 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
return {sym, true};
}
-Symbol *SymbolTable::addDefined(StringRef name, InputFile *file,
- InputSection *isec, uint32_t value,
- bool isWeakDef, bool isPrivateExtern) {
+Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
+ InputSection *isec, uint32_t value,
+ bool isWeakDef, bool isPrivateExtern) {
Symbol *s;
bool wasInserted;
bool overridesWeakDef = false;
@@ -52,7 +52,7 @@ Symbol *SymbolTable::addDefined(StringRef name, InputFile *file,
// If one of them isn't private extern, the merged symbol isn't.
if (defined->isWeakDef())
defined->privateExtern &= isPrivateExtern;
- return s;
+ return defined;
}
if (!defined->isWeakDef()) {
error("duplicate symbol: " + name + "\n>>> defined in " +
@@ -70,7 +70,7 @@ Symbol *SymbolTable::addDefined(StringRef name, InputFile *file,
replaceSymbol<Defined>(s, name, file, isec, value, isWeakDef,
/*isExternal=*/true, isPrivateExtern);
defined->overridesWeakDef = overridesWeakDef;
- return s;
+ return defined;
}
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
@@ -158,18 +158,12 @@ Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
return s;
}
-Symbol *SymbolTable::addDSOHandle(const MachHeaderSection *header) {
- Symbol *s;
- bool wasInserted;
- std::tie(s, wasInserted) = insert(DSOHandle::name);
- if (!wasInserted) {
- // FIXME: Make every symbol (including absolute symbols) contain a
- // reference to their originating file, then add that file name to this
- // error message. dynamic_lookup symbols don't have an originating file.
- if (isa<Defined>(s))
- error("found defined symbol with illegal name " + DSOHandle::name);
- }
- replaceSymbol<DSOHandle>(s, header);
+Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec,
+ uint32_t value, bool isPrivateExtern,
+ bool isLinkerInternal) {
+ Defined *s = addDefined(name, nullptr, isec, value, /*isWeakDef=*/false,
+ isPrivateExtern);
+ s->linkerInternal = isLinkerInternal;
return s;
}
diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index 38e21617c0aa..661e6545e7a6 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -24,6 +24,7 @@ class ObjFile;
class InputSection;
class MachHeaderSection;
class Symbol;
+class Defined;
class Undefined;
/*
@@ -34,8 +35,8 @@ class Undefined;
*/
class SymbolTable {
public:
- Symbol *addDefined(StringRef name, InputFile *, InputSection *,
- uint32_t value, bool isWeakDef, bool isPrivateExtern);
+ Defined *addDefined(StringRef name, InputFile *, InputSection *,
+ uint32_t value, bool isWeakDef, bool isPrivateExtern);
Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef);
@@ -48,7 +49,8 @@ class SymbolTable {
Symbol *addLazy(StringRef name, ArchiveFile *file,
const llvm::object::Archive::Symbol &sym);
- Symbol *addDSOHandle(const MachHeaderSection *);
+ Defined *addSynthetic(StringRef name, InputSection *, uint32_t value,
+ bool isPrivateExtern, bool isLinkerInternal);
ArrayRef<Symbol *> getSymbols() const { return symVector; }
Symbol *find(StringRef name);
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 69ca1f6bb888..dd55be748442 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -45,9 +45,3 @@ uint64_t Defined::getFileOffset() const {
}
void LazySymbol::fetchArchiveMember() { getFile()->fetch(sym); }
-
-uint64_t DSOHandle::getVA() const { return header->addr; }
-
-uint64_t DSOHandle::getFileOffset() const { return header->fileOff; }
-
-constexpr StringRef DSOHandle::name;
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index dfd0378dea19..ada5bc164c82 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -39,7 +39,6 @@ class Symbol {
CommonKind,
DylibKind,
LazyKind,
- DSOHandleKind,
};
virtual ~Symbol() {}
@@ -100,7 +99,7 @@ class Defined : public Symbol {
bool isWeakDef, bool isExternal, bool isPrivateExtern)
: Symbol(DefinedKind, name, file), isec(isec), value(value),
overridesWeakDef(false), privateExtern(isPrivateExtern),
- weakDef(isWeakDef), external(isExternal) {}
+ linkerInternal(false), weakDef(isWeakDef), external(isExternal) {}
bool isWeakDef() const override { return weakDef; }
bool isExternalWeakDef() const {
@@ -123,7 +122,10 @@ class Defined : public Symbol {
uint32_t value;
bool overridesWeakDef : 1;
+ // Whether this symbol should appear in the output binary's export trie.
bool privateExtern : 1;
+ // Whether this symbol should appear in the output binary's symbol table.
+ bool linkerInternal : 1;
private:
const bool weakDef : 1;
@@ -228,44 +230,12 @@ class LazySymbol : public Symbol {
const llvm::object::Archive::Symbol sym;
};
-// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which
-// does e.g. cleanup of static global variables. The ABI document says that the
-// pointer can point to any address in one of the dylib's segments, but in
-// practice ld64 seems to set it to point to the header, so that's what's
-// implemented here.
-//
-// The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet
-// tested this on an ARM platform.
-//
-// DSOHandle effectively functions like a Defined symbol, but it doesn't belong
-// to an InputSection.
-class DSOHandle : public Symbol {
-public:
- DSOHandle(const MachHeaderSection *header)
- : Symbol(DSOHandleKind, name, nullptr), header(header) {}
-
- const MachHeaderSection *header;
-
- uint64_t getVA() const override;
-
- uint64_t getFileOffset() const override;
-
- bool isWeakDef() const override { return false; }
-
- bool isTlv() const override { return false; }
-
- static constexpr StringRef name = "___dso_handle";
-
- static bool classof(const Symbol *s) { return s->kind() == DSOHandleKind; }
-};
-
union SymbolUnion {
alignas(Defined) char a[sizeof(Defined)];
alignas(Undefined) char b[sizeof(Undefined)];
alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
alignas(LazySymbol) char e[sizeof(LazySymbol)];
- alignas(DSOHandle) char f[sizeof(DSOHandle)];
};
template <typename T, typename... ArgT>
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index f679e1d7c6c6..66dbfd347a6d 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -42,6 +42,11 @@ std::vector<SyntheticSection *> macho::syntheticSections;
SyntheticSection::SyntheticSection(const char *segname, const char *name)
: OutputSection(SyntheticKind, name), segname(segname) {
+ isec = make<InputSection>();
+ isec->segname = segname;
+ isec->name = name;
+ isec->parent = this;
+ isec->outSecOff = 0;
syntheticSections.push_back(this);
}
@@ -118,12 +123,6 @@ void MachHeaderSection::writeTo(uint8_t *buf) const {
PageZeroSection::PageZeroSection()
: SyntheticSection(segment_names::pageZero, section_names::pageZero) {}
-uint64_t Location::getVA() const {
- if (const auto *isec = section.dyn_cast<const InputSection *>())
- return isec->getVA() + offset;
- return section.get<const OutputSection *>()->addr + offset;
-}
-
RebaseSection::RebaseSection()
: LinkEditSection(segment_names::linkEdit, section_names::rebase) {}
@@ -186,16 +185,11 @@ void RebaseSection::finalizeContents() {
os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER);
llvm::sort(locations, [](const Location &a, const Location &b) {
- return a.getVA() < b.getVA();
+ return a.isec->getVA() < b.isec->getVA();
});
- for (const Location &loc : locations) {
- if (const auto *isec = loc.section.dyn_cast<const InputSection *>()) {
- encodeRebase(isec->parent, isec->outSecOff + loc.offset, lastRebase, os);
- } else {
- const auto *osec = loc.section.get<const OutputSection *>();
- encodeRebase(osec, loc.offset, lastRebase, os);
- }
- }
+ for (const Location &loc : locations)
+ encodeRebase(loc.isec->parent, loc.isec->outSecOff + loc.offset, lastRebase,
+ os);
if (lastRebase.consecutiveCount != 0)
encodeDoRebase(lastRebase, os);
@@ -218,7 +212,7 @@ void NonLazyPointerSectionBase::addEntry(Symbol *sym) {
assert(!sym->isInGot());
sym->gotIndex = entries.size() - 1;
- addNonLazyBindingEntries(sym, this, sym->gotIndex * WordSize);
+ addNonLazyBindingEntries(sym, isec, sym->gotIndex * WordSize);
}
}
@@ -336,14 +330,9 @@ void BindingSection::finalizeContents() {
encodeDylibOrdinal(ordinal, os);
lastBinding.ordinal = ordinal;
}
- if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) {
- encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset,
- b.addend, /*isWeakBinding=*/false, lastBinding, os);
- } else {
- auto *osec = b.target.section.get<const OutputSection *>();
- encodeBinding(b.dysym, osec, b.target.offset, b.addend,
- /*isWeakBinding=*/false, lastBinding, os);
- }
+ encodeBinding(b.dysym, b.target.isec->parent,
+ b.target.isec->outSecOff + b.target.offset, b.addend,
+ /*isWeakBinding=*/false, lastBinding, os);
}
if (!bindings.empty())
os << static_cast<uint8_t>(BIND_OPCODE_DONE);
@@ -369,16 +358,10 @@ void WeakBindingSection::finalizeContents() {
[](const WeakBindingEntry &a, const WeakBindingEntry &b) {
return a.target.getVA() < b.target.getVA();
});
- for (const WeakBindingEntry &b : bindings) {
- if (const auto *isec = b.target.section.dyn_cast<const InputSection *>()) {
- encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset,
- b.addend, /*isWeakBinding=*/true, lastBinding, os);
- } else {
- const auto *osec = b.target.section.get<const OutputSection *>();
- encodeBinding(b.symbol, osec, b.target.offset, b.addend,
- /*isWeakBinding=*/true, lastBinding, os);
- }
- }
+ for (const WeakBindingEntry &b : bindings)
+ encodeBinding(b.symbol, b.target.isec->parent,
+ b.target.isec->outSecOff + b.target.offset, b.addend,
+ /*isWeakBinding=*/true, lastBinding, os);
if (!bindings.empty() || !definitions.empty())
os << static_cast<uint8_t>(BIND_OPCODE_DONE);
}
@@ -396,23 +379,21 @@ bool macho::needsBinding(const Symbol *sym) {
}
void macho::addNonLazyBindingEntries(const Symbol *sym,
- SectionPointerUnion section,
- uint64_t offset, int64_t addend) {
+ const InputSection *isec, uint64_t offset,
+ int64_t addend) {
if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
- in.binding->addEntry(dysym, section, offset, addend);
+ in.binding->addEntry(dysym, isec, offset, addend);
if (dysym->isWeakDef())
- in.weakBinding->addEntry(sym, section, offset, addend);
+ in.weakBinding->addEntry(sym, isec, offset, addend);
} else if (auto *defined = dyn_cast<Defined>(sym)) {
- in.rebase->addEntry(section, offset);
+ in.rebase->addEntry(isec, offset);
if (defined->isExternalWeakDef())
- in.weakBinding->addEntry(sym, section, offset, addend);
- } else if (!isa<DSOHandle>(sym)) {
+ in.weakBinding->addEntry(sym, isec, offset, addend);
+ } else {
// Undefined symbols are filtered out in scanRelocations(); we should never
// get here
llvm_unreachable("cannot bind to an undefined symbol");
}
- // TODO: understand the DSOHandle case better.
- // Is it bindable? Add a new test?
}
StubsSection::StubsSection()
@@ -538,7 +519,7 @@ void LazyBindingSection::writeTo(uint8_t *buf) const {
void LazyBindingSection::addEntry(DylibSymbol *dysym) {
if (entries.insert(dysym)) {
dysym->stubsHelperIndex = entries.size() - 1;
- in.rebase->addEntry(in.lazyPointers, dysym->stubsIndex * WordSize);
+ in.rebase->addEntry(in.lazyPointers->isec, dysym->stubsIndex * WordSize);
}
}
@@ -572,9 +553,9 @@ void macho::prepareBranchTarget(Symbol *sym) {
if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
if (in.stubs->addEntry(dysym)) {
if (sym->isWeakDef()) {
- in.binding->addEntry(dysym, in.lazyPointers,
+ in.binding->addEntry(dysym, in.lazyPointers->isec,
sym->stubsIndex * WordSize);
- in.weakBinding->addEntry(sym, in.lazyPointers,
+ in.weakBinding->addEntry(sym, in.lazyPointers->isec,
sym->stubsIndex * WordSize);
} else {
in.lazyBinding->addEntry(dysym);
@@ -583,8 +564,8 @@ void macho::prepareBranchTarget(Symbol *sym) {
} else if (auto *defined = dyn_cast<Defined>(sym)) {
if (defined->isExternalWeakDef()) {
if (in.stubs->addEntry(sym)) {
- in.rebase->addEntry(in.lazyPointers, sym->stubsIndex * WordSize);
- in.weakBinding->addEntry(sym, in.lazyPointers,
+ in.rebase->addEntry(in.lazyPointers->isec, sym->stubsIndex * WordSize);
+ in.weakBinding->addEntry(sym, in.lazyPointers->isec,
sym->stubsIndex * WordSize);
}
}
@@ -786,9 +767,10 @@ void SymtabSection::finalizeContents() {
for (Symbol *sym : symtab->getSymbols()) {
if (auto *defined = dyn_cast<Defined>(sym)) {
+ if (defined->linkerInternal)
+ continue;
assert(defined->isExternal());
- (void)defined;
- addSymbol(externalSymbols, sym);
+ addSymbol(externalSymbols, defined);
} else if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
if (dysym->isReferenced())
addSymbol(undefinedSymbols, sym);
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 232035f8f5f3..eb0a473ff986 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -73,6 +73,9 @@ class SyntheticSection : public OutputSection {
}
const StringRef segname;
+ // This fake InputSection makes it easier for us to write code that applies
+ // generically to both user inputs and synthetics.
+ InputSection *isec;
};
// All sections in __LINKEDIT should inherit from this.
@@ -165,16 +168,13 @@ class TlvPointerSection : public NonLazyPointerSectionBase {
section_names::threadPtrs) {}
};
-using SectionPointerUnion =
- llvm::PointerUnion<const InputSection *, const OutputSection *>;
-
struct Location {
- SectionPointerUnion section = nullptr;
- uint64_t offset = 0;
+ const InputSection *isec;
+ uint64_t offset;
- Location(SectionPointerUnion section, uint64_t offset)
- : section(section), offset(offset) {}
- uint64_t getVA() const;
+ Location(const InputSection *isec, uint64_t offset)
+ : isec(isec), offset(offset) {}
+ uint64_t getVA() const { return isec->getVA() + offset; }
};
// Stores rebase opcodes, which tell dyld where absolute addresses have been
@@ -188,9 +188,9 @@ class RebaseSection : public LinkEditSection {
bool isNeeded() const override { return !locations.empty(); }
void writeTo(uint8_t *buf) const override;
- void addEntry(SectionPointerUnion section, uint64_t offset) {
+ void addEntry(const InputSection *isec, uint64_t offset) {
if (config->isPic)
- locations.push_back({section, offset});
+ locations.push_back({isec, offset});
}
private:
@@ -215,9 +215,9 @@ class BindingSection : public LinkEditSection {
bool isNeeded() const override { return !bindings.empty(); }
void writeTo(uint8_t *buf) const override;
- void addEntry(const DylibSymbol *dysym, SectionPointerUnion section,
+ void addEntry(const DylibSymbol *dysym, const InputSection *isec,
uint64_t offset, int64_t addend = 0) {
- bindings.emplace_back(dysym, addend, Location(section, offset));
+ bindings.emplace_back(dysym, addend, Location(isec, offset));
}
private:
@@ -254,9 +254,9 @@ class WeakBindingSection : public LinkEditSection {
void writeTo(uint8_t *buf) const override;
- void addEntry(const Symbol *symbol, SectionPointerUnion section,
- uint64_t offset, int64_t addend = 0) {
- bindings.emplace_back(symbol, addend, Location(section, offset));
+ void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset,
+ int64_t addend = 0) {
+ bindings.emplace_back(symbol, addend, Location(isec, offset));
}
bool hasEntry() const { return !bindings.empty(); }
@@ -277,7 +277,7 @@ class WeakBindingSection : public LinkEditSection {
bool needsBinding(const Symbol *);
// Add bindings for symbols that need weak or non-lazy bindings.
-void addNonLazyBindingEntries(const Symbol *, SectionPointerUnion,
+void addNonLazyBindingEntries(const Symbol *, const InputSection *,
uint64_t offset, int64_t addend = 0);
// The following sections implement lazy symbol binding -- very similar to the
diff --git a/lld/test/MachO/dso-handle.s b/lld/test/MachO/dso-handle.s
index b05c513d79b3..16fc535cf8b0 100644
--- a/lld/test/MachO/dso-handle.s
+++ b/lld/test/MachO/dso-handle.s
@@ -1,19 +1,31 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
-# RUN: %lld %t.o -o %t
+# RUN: %lld -lSystem %t.o -o %t
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
# CHECK: leaq {{.*}} # 100000000
# CHECK-NEXT: leaq {{.*}} # 100000000
# RUN: %lld -dylib %t.o -o %t.dylib
-# RUN: llvm-objdump -d --no-show-raw-insn %t.dylib | FileCheck %s --check-prefix=DYLIB-CHECK
+# RUN: llvm-objdump -d --no-show-raw-insn --rebase --section-headers %t.dylib | FileCheck %s --check-prefix=DYLIB-CHECK
# DYLIB-CHECK: leaq {{.*}} # 0
# DYLIB-CHECK-NEXT: leaq {{.*}} # 0
+# DYLIB-LABEL: Sections:
+# DYLIB: __data 00000008 [[#%x,DATA:]] DATA
+# DYLIB-LABEL: Rebase table:
+# DYLIB-NEXT: segment section address type
+# DYLIB-NEXT: __DATA __data 0x{{0*}}[[#DATA]] pointer
+
+# RUN: llvm-objdump --syms %t.dylib | FileCheck %s --check-prefix=SYMS
+# SYMS-NOT: ___dso_handle
+
.globl _main
.text
_main:
leaq ___dso_handle(%rip), %rdx
movq ___dso_handle at GOTPCREL(%rip), %rdx
ret
+
+.data
+.quad ___dso_handle
diff --git a/lld/test/MachO/invalid/dso-handle-duplicate.s b/lld/test/MachO/invalid/dso-handle-duplicate.s
index 5f64dc10b0f1..ba9d04af6bfb 100644
--- a/lld/test/MachO/invalid/dso-handle-duplicate.s
+++ b/lld/test/MachO/invalid/dso-handle-duplicate.s
@@ -7,8 +7,10 @@
## far-out edge case that should be safe to ignore.
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
-# RUN: not %lld -dylib %t.o -o %t.dylib 2>&1 | FileCheck %s
-# CHECK: error: found defined symbol with illegal name ___dso_handle
+# RUN: not %lld -dylib %t.o -o /dev/null 2>&1 | FileCheck %s -DFILE=%t.o
+# CHECK: error: duplicate symbol: ___dso_handle
+# CHECK-NEXT: >>> defined in [[FILE]]
+# CHECK-NEXT: >>> defined in <internal>
.globl _main, ___dso_handle
.text
More information about the llvm-commits
mailing list