[llvm] 389e0a8 - [lld-macho] Support synthesizing __TEXT, __init_offsets
Daniel Bertalan via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 31 01:22:07 PDT 2022
Author: Daniel Bertalan
Date: 2022-08-31T10:13:45+02:00
New Revision: 389e0a81a15ca688cf85a82d04aeaa68d18da161
URL: https://github.com/llvm/llvm-project/commit/389e0a81a15ca688cf85a82d04aeaa68d18da161
DIFF: https://github.com/llvm/llvm-project/commit/389e0a81a15ca688cf85a82d04aeaa68d18da161.diff
LOG: [lld-macho] Support synthesizing __TEXT,__init_offsets
This section stores 32-bit `__TEXT` segment offsets of initializer
functions, and is used instead of `__mod_init_func` when chained fixups
are enabled.
Storing the offsets lets us avoid emitting fixups for the initializers.
Differential Revision: https://reviews.llvm.org/D132947
Added:
lld/test/MachO/init-offsets.s
Modified:
lld/MachO/Config.h
lld/MachO/Driver.cpp
lld/MachO/InputSection.h
lld/MachO/MarkLive.cpp
lld/MachO/Options.td
lld/MachO/OutputSegment.cpp
lld/MachO/Symbols.h
lld/MachO/SyntheticSections.cpp
lld/MachO/SyntheticSections.h
lld/MachO/Writer.cpp
llvm/include/llvm/BinaryFormat/MachO.h
llvm/lib/MC/MCSectionMachO.cpp
llvm/tools/llvm-objdump/MachODump.cpp
Removed:
################################################################################
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 8f2d790603c63..c6e8b2582bd7d 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -131,6 +131,7 @@ struct Configuration {
bool emitBitcodeBundle = false;
bool emitDataInCodeInfo = false;
bool emitEncryptionInfo = false;
+ bool emitInitOffsets = false;
bool timeTraceEnabled = false;
bool dataConst = false;
bool dedupLiterals = true;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 7d9b06411818b..d22c3ea718c48 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1103,6 +1103,11 @@ static void gatherInputSections() {
if (auto *isec = dyn_cast<ConcatInputSection>(subsection.isec)) {
if (isec->isCoalescedWeak())
continue;
+ if (config->emitInitOffsets &&
+ sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
+ in.initOffsets->addInput(isec);
+ continue;
+ }
isec->outSecOff = inputOrder++;
if (!osec)
osec = ConcatOutputSection::getOrCreateForInput(isec);
@@ -1432,6 +1437,7 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->emitBitcodeBundle = args.hasArg(OPT_bitcode_bundle);
config->emitDataInCodeInfo =
args.hasFlag(OPT_data_in_code_info, OPT_no_data_in_code_info, true);
+ config->emitInitOffsets = args.hasArg(OPT_init_offsets);
config->icfLevel = getICFLevel(args);
config->dedupLiterals =
args.hasFlag(OPT_deduplicate_literals, OPT_icf_eq, false) ||
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 0f79bdfd22648..8946724e2d984 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -314,6 +314,7 @@ constexpr const char functionStarts[] = "__func_starts";
constexpr const char got[] = "__got";
constexpr const char header[] = "__mach_header";
constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
+constexpr const char initOffsets[] = "__init_offsets";
constexpr const char const_[] = "__const";
constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
constexpr const char lazyBinding[] = "__lazy_binding";
diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp
index cc9afe181d0f3..ba7d215d9f0a4 100644
--- a/lld/MachO/MarkLive.cpp
+++ b/lld/MachO/MarkLive.cpp
@@ -279,11 +279,16 @@ void markLive() {
// mod_init_funcs, mod_term_funcs sections
if (sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS ||
sectionType(isec->getFlags()) == S_MOD_TERM_FUNC_POINTERS) {
+ assert(!config->emitInitOffsets ||
+ sectionType(isec->getFlags()) != S_MOD_INIT_FUNC_POINTERS);
marker->enqueue(isec, 0);
continue;
}
}
+ for (ConcatInputSection *isec : in.initOffsets->inputs())
+ marker->enqueue(isec, 0);
+
marker->markTransitively();
}
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 2d2c58e3ada01..6af993a430cb6 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1273,8 +1273,7 @@ def ignore_optimization_hints : Flag<["-"], "ignore_optimization_hints">,
HelpText<"Ignore Linker Optimization Hints">,
Group<grp_undocumented>;
def init_offsets : Flag<["-"], "init_offsets">,
- HelpText<"This option is undocumented in ld64">,
- Flags<[HelpHidden]>,
+ HelpText<"Store __TEXT segment offsets of static initializers">,
Group<grp_undocumented>;
def keep_dwarf_unwind : Flag<["-"], "keep_dwarf_unwind">,
HelpText<"This option is undocumented in ld64">,
diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp
index da1394c088314..91770f58b805a 100644
--- a/lld/MachO/OutputSegment.cpp
+++ b/lld/MachO/OutputSegment.cpp
@@ -84,10 +84,11 @@ static int sectionOrder(OutputSection *osec) {
// Sections are uniquely identified by their segment + section name.
if (segname == segment_names::text) {
return StringSwitch<int>(osec->name)
- .Case(section_names::header, -4)
- .Case(section_names::text, -3)
- .Case(section_names::stubs, -2)
- .Case(section_names::stubHelper, -1)
+ .Case(section_names::header, -5)
+ .Case(section_names::text, -4)
+ .Case(section_names::stubs, -3)
+ .Case(section_names::stubHelper, -2)
+ .Case(section_names::initOffsets, -1)
.Case(section_names::unwindInfo, std::numeric_limits<int>::max() - 1)
.Case(section_names::ehFrame, std::numeric_limits<int>::max())
.Default(osec->inputOrder);
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index c661913becf48..9d3b56a7ae269 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -346,6 +346,14 @@ T *replaceSymbol(Symbol *s, ArgT &&...arg) {
return sym;
}
+// Can a symbol's address only be resolved at runtime?
+inline bool needsBinding(const Symbol *sym) {
+ if (isa<DylibSymbol>(sym))
+ return true;
+ if (const auto *defined = dyn_cast<Defined>(sym))
+ return defined->isExternalWeakDef() || defined->interposable;
+ return false;
+}
} // namespace macho
std::string toString(const macho::Symbol &);
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 1737484c83a93..9373e0b24fe0a 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1816,6 +1816,74 @@ void ObjCImageInfoSection::writeTo(uint8_t *buf) const {
write32le(buf + 4, flags);
}
+InitOffsetsSection::InitOffsetsSection()
+ : SyntheticSection(segment_names::text, section_names::initOffsets) {
+ flags = S_INIT_FUNC_OFFSETS;
+}
+
+uint64_t InitOffsetsSection::getSize() const {
+ size_t count = 0;
+ for (const ConcatInputSection *isec : sections)
+ count += isec->relocs.size();
+ return count * sizeof(uint32_t);
+}
+
+void InitOffsetsSection::writeTo(uint8_t *buf) const {
+ uint64_t textVA = 0;
+ for (const OutputSegment *oseg : outputSegments)
+ if (oseg->name == segment_names::text) {
+ textVA = oseg->addr;
+ break;
+ }
+
+ // FIXME: Add function specified by -init when that argument is implemented.
+ for (ConcatInputSection *isec : sections) {
+ for (const Reloc &rel : isec->relocs) {
+ const Symbol *referent = rel.referent.dyn_cast<Symbol *>();
+ assert(referent && "section relocation should have been rejected");
+ uint64_t offset = referent->getVA() - textVA;
+ // FIXME: Can we handle this gracefully?
+ if (offset > UINT32_MAX)
+ fatal(isec->getLocation(rel.offset) + ": offset to initializer " +
+ referent->getName() + " (" + utohexstr(offset) +
+ ") does not fit in 32 bits");
+
+ // Entries need to be added in the order they appear in the section, but
+ // relocations aren't guaranteed to be sorted.
+ size_t index = rel.offset >> target->p2WordSize;
+ write32le(&buf[index * sizeof(uint32_t)], offset);
+ }
+ buf += isec->relocs.size() * sizeof(uint32_t);
+ }
+}
+
+// The inputs are __mod_init_func sections, which contain pointers to
+// initializer functions, therefore all relocations should be of the UNSIGNED
+// type. InitOffsetsSection stores offsets, so if the initializer's address is
+// not known at link time, stub-indirection has to be used.
+void InitOffsetsSection::setUp() {
+ for (const ConcatInputSection *isec : sections) {
+ for (const Reloc &rel : isec->relocs) {
+ RelocAttrs attrs = target->getRelocAttrs(rel.type);
+ if (!attrs.hasAttr(RelocAttrBits::UNSIGNED))
+ error(isec->getLocation(rel.offset) +
+ ": unsupported relocation type: " + attrs.name);
+ if (rel.addend != 0)
+ error(isec->getLocation(rel.offset) +
+ ": relocation addend is not representable in __init_offsets");
+ if (rel.referent.is<InputSection *>())
+ error(isec->getLocation(rel.offset) +
+ ": unexpected section relocation");
+
+ Symbol *sym = rel.referent.dyn_cast<Symbol *>();
+ if (auto *undefined = dyn_cast<Undefined>(sym))
+ treatUndefinedSymbol(*undefined, isec, rel.offset);
+ if (needsBinding(sym))
+ in.stubs->addEntry(sym);
+ }
+ }
+}
+
void macho::createSyntheticSymbols() {
auto addHeaderSymbol = [](const char *name) {
symtab->addSynthetic(name, in.header->isec, /*value=*/0,
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 9b158dbd515d1..ac9bde62e6696 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -647,6 +647,32 @@ class ObjCImageInfoSection final : public SyntheticSection {
std::vector<const InputFile *> files; // files with image info
};
+// This section stores 32-bit __TEXT segment offsets of initializer functions.
+//
+// The compiler stores pointers to initializers in __mod_init_func. These need
+// to be fixed up at load time, which takes time and dirties memory. By
+// synthesizing InitOffsetsSection from them, this data can live in the
+// read-only __TEXT segment instead. This section is used by default when
+// chained fixups are enabled.
+//
+// There is no similar counterpart to __mod_term_func, as that section is
+// deprecated, and static destructors are instead handled by registering them
+// via __cxa_atexit from an autogenerated initializer function (see D121736).
+class InitOffsetsSection final : public SyntheticSection {
+public:
+ InitOffsetsSection();
+ bool isNeeded() const override { return !sections.empty(); }
+ uint64_t getSize() const override;
+ void writeTo(uint8_t *buf) const override;
+ void setUp();
+
+ void addInput(ConcatInputSection *isec) { sections.push_back(isec); }
+ const std::vector<ConcatInputSection *> &inputs() const { return sections; }
+
+private:
+ std::vector<ConcatInputSection *> sections;
+};
+
struct InStruct {
const uint8_t *bufferStart = nullptr;
MachHeaderSection *header = nullptr;
@@ -668,6 +694,7 @@ struct InStruct {
UnwindInfoSection *unwindInfo = nullptr;
ObjCImageInfoSection *objCImageInfo = nullptr;
ConcatInputSection *imageLoaderCache = nullptr;
+ InitOffsetsSection *initOffsets = nullptr;
};
extern InStruct in;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index e8bcf8cd96445..01054fe773b1f 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -575,15 +575,6 @@ void Writer::treatSpecialUndefineds() {
}
}
-// Can a symbol's address can only be resolved at runtime?
-static bool needsBinding(const Symbol *sym) {
- if (isa<DylibSymbol>(sym))
- return true;
- if (const auto *defined = dyn_cast<Defined>(sym))
- return defined->isExternalWeakDef() || defined->interposable;
- return false;
-}
-
static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
const lld::macho::Reloc &r) {
assert(sym->isLive());
@@ -1141,6 +1132,8 @@ template <class LP> void Writer::run() {
if (in.objcStubs->isNeeded())
in.objcStubs->setUp();
scanRelocations();
+ if (in.initOffsets->isNeeded())
+ in.initOffsets->setUp();
// Do not proceed if there was an undefined symbol.
reportPendingUndefinedSymbols();
@@ -1204,6 +1197,7 @@ void macho::createSyntheticSections() {
in.objcStubs = make<ObjCStubsSection>();
in.unwindInfo = makeUnwindInfoSection();
in.objCImageInfo = make<ObjCImageInfoSection>();
+ in.initOffsets = make<InitOffsetsSection>();
// This section contains space for just a single word, and will be used by
// dyld to cache an address to the image loader it uses.
diff --git a/lld/test/MachO/init-offsets.s b/lld/test/MachO/init-offsets.s
new file mode 100644
index 0000000000000..9a27033adfdfe
--- /dev/null
+++ b/lld/test/MachO/init-offsets.s
@@ -0,0 +1,73 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/first.s -o %t/first.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/second.s -o %t/second.o
+
+# RUN: %lld -lSystem -init_offsets -undefined dynamic_lookup %t/first.o %t/second.o -o %t/out
+# RUN: llvm-otool -lv %t/out | FileCheck --check-prefix=FLAGS --implicit-check-not=__mod_init_func %s
+# RUN: llvm-otool -l %t/out > %t/dump.txt
+# RUN: llvm-objdump --macho --print-imm-hex --section=__TEXT,__stubs %t/out >> %t/dump.txt
+# RUN: llvm-objdump --macho --syms %t/out >> %t/dump.txt
+# RUN: llvm-objcopy --dump-section=__TEXT,__init_offsets=%t/section.bin %t/out
+# RUN: echo "__TEXT,__init_offsets contents:" >> %t/dump.txt
+# RUN: od -An -txI %t/section.bin >> %t/dump.txt
+# RUN: FileCheck --check-prefix=CONTENT %s < %t/dump.txt
+
+## This test checks that:
+## - __mod_init_func is replaced by __init_offsets.
+## - __mod_init_func has type S_INIT_FUNC_OFFSETS.
+## - initializers show up in the order their parent objects are specified on the
+## command line, and in the order they show up within __mod_init_func.
+## - for undefined and dylib symbols, stubs are created, and the offsets point to those.
+## - offsets are relative to __TEXT's address, they aren't an absolute virtual address.
+
+# FLAGS: sectname __init_offsets
+# FLAGS-NEXT: segname __TEXT
+# FLAGS-NEXT: addr
+# FLAGS-NEXT: size 0x0000000000000010
+# FLAGS-NEXT: offset
+# FLAGS-NEXT: align
+# FLAGS-NEXT: reloff 0
+# FLAGS-NEXT: nreloc 0
+# FLAGS-NEXT: type S_INIT_FUNC_OFFSETS
+
+# CONTENT: segname __TEXT
+# CONTENT-NEXT: 0x[[#%x, TEXT:]]
+
+# CONTENT: Contents of (__TEXT,__stubs) section
+# CONTENT-NEXT: [[#%x, ISNAN:]]: {{.*}} ## literal pool symbol address: ___isnan
+# CONTENT-NEXT: [[#%x, UNDEF:]]: {{.*}} ## literal pool symbol address: _undefined
+
+# CONTENT: SYMBOL TABLE:
+# CONTENT: [[#%x, FIRST:]] g F __TEXT,__text _first_init
+# CONTENT: [[#%x, SECOND:]] g F __TEXT,__text _second_init
+
+# CONTENT: __TEXT,__init_offsets contents:
+# CONTENT: [[#%.8x, FIRST - TEXT]] [[#%.8x, ISNAN - TEXT]] [[#%.8x, UNDEF - TEXT]] [[#%.8x, SECOND - TEXT]]
+
+#--- first.s
+.globl _first_init, ___isnan, _main
+.text
+_first_init:
+ ret
+_main:
+ ret
+
+.section __DATA,__mod_init_func,mod_init_funcs
+.quad _first_init
+.quad ___isnan
+
+.subsections_via_symbols
+
+#--- second.s
+.globl _second_init, _undefined
+.text
+_second_init:
+ ret
+
+.section __DATA,__mod_init_func,mod_init_funcs
+.quad _undefined
+.quad _second_init
+
+.subsections_via_symbols
diff --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h
index 81b315a59ea9e..8627ed68bf09c 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/llvm/include/llvm/BinaryFormat/MachO.h
@@ -175,8 +175,11 @@ enum SectionType : uint32_t {
/// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local
/// variable initialization pointers to functions.
S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15u,
+ /// S_INIT_FUNC_OFFSETS - Section with 32-bit offsets to initializer
+ /// functions.
+ S_INIT_FUNC_OFFSETS = 0x16u,
- LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS
+ LAST_KNOWN_SECTION_TYPE = S_INIT_FUNC_OFFSETS
};
enum : uint32_t {
diff --git a/llvm/lib/MC/MCSectionMachO.cpp b/llvm/lib/MC/MCSectionMachO.cpp
index 1c210fb0f4c87..f7eedac3f2d1a 100644
--- a/llvm/lib/MC/MCSectionMachO.cpp
+++ b/llvm/lib/MC/MCSectionMachO.cpp
@@ -62,6 +62,8 @@ static constexpr struct {
StringLiteral("S_THREAD_LOCAL_VARIABLE_POINTERS")}, // 0x14
{StringLiteral("thread_local_init_function_pointers"),
StringLiteral("S_THREAD_LOCAL_INIT_FUNCTION_POINTERS")}, // 0x15
+ {StringLiteral("") /* linker-synthesized */,
+ StringLiteral("S_INIT_FUNC_OFFSETS")}, // 0x16
};
/// SectionAttrDescriptors - This is an array of descriptors for section
diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index 1146d36f5da29..f615fbf4d81c5 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -8955,6 +8955,8 @@ static void PrintSection(const char *sectname, const char *segname,
outs() << " S_THREAD_LOCAL_VARIABLE_POINTERS\n";
else if (section_type == MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS)
outs() << " S_THREAD_LOCAL_INIT_FUNCTION_POINTERS\n";
+ else if (section_type == MachO::S_INIT_FUNC_OFFSETS)
+ outs() << " S_INIT_FUNC_OFFSETS\n";
else
outs() << format("0x%08" PRIx32, section_type) << "\n";
outs() << "attributes";
More information about the llvm-commits
mailing list