[lld] 9cc489a - [lld-macho][nfc] Factor-out NFC changes from main __eh_frame diff
Greg McGary via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 17 14:20:40 PST 2021
Author: Greg McGary
Date: 2021-11-17T15:16:44-07:00
New Revision: 9cc489a4b2b58b70f7b7d84a8544e22f39423ae6
URL: https://github.com/llvm/llvm-project/commit/9cc489a4b2b58b70f7b7d84a8544e22f39423ae6
DIFF: https://github.com/llvm/llvm-project/commit/9cc489a4b2b58b70f7b7d84a8544e22f39423ae6.diff
LOG: [lld-macho][nfc] Factor-out NFC changes from main __eh_frame diff
In order to keep signal:noise high for the `__eh_frame` diff, I have teased-out the NFC changes and put them here.
Differential Revision: https://reviews.llvm.org/D114017
Added:
Modified:
lld/MachO/Driver.cpp
lld/MachO/ICF.cpp
lld/MachO/InputFiles.cpp
lld/MachO/InputFiles.h
lld/MachO/InputSection.cpp
lld/MachO/InputSection.h
lld/MachO/MarkLive.cpp
lld/MachO/Symbols.cpp
lld/MachO/Symbols.h
lld/MachO/UnwindInfoSection.cpp
Removed:
################################################################################
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 37955c9036f52..7cce6cd3bc351 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1022,15 +1022,17 @@ static void gatherInputSections() {
int inputOrder = 0;
for (const InputFile *file : inputFiles) {
for (const Section §ion : file->sections) {
+ const Subsections &subsections = section.subsections;
+ if (subsections.empty())
+ continue;
+ if (subsections[0].isec->getName() == section_names::compactUnwind)
+ // Compact unwind entries require special handling elsewhere.
+ continue;
ConcatOutputSection *osec = nullptr;
- for (const Subsection &subsection : section.subsections) {
+ for (const Subsection &subsection : subsections) {
if (auto *isec = dyn_cast<ConcatInputSection>(subsection.isec)) {
if (isec->isCoalescedWeak())
continue;
- if (isec->getSegName() == segment_names::ld) {
- assert(isec->getName() == section_names::compactUnwind);
- continue;
- }
isec->outSecOff = inputOrder++;
if (!osec)
osec = ConcatOutputSection::getOrCreateForInput(isec);
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 6c3a565e74a98..8802f292a9c85 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -187,7 +187,7 @@ static bool equalsVariable(const ConcatInputSection *ia,
// info matches. For simplicity, we only handle the case where there are only
// symbols at offset zero within the section (which is typically the case with
// .subsections_via_symbols.)
- auto hasCU = [](Defined *d) { return d->compactUnwind; };
+ auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; };
auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU);
auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU);
if (itA == ia->symbols.end())
@@ -196,8 +196,8 @@ static bool equalsVariable(const ConcatInputSection *ia,
return false;
const Defined *da = *itA;
const Defined *db = *itB;
- if (da->compactUnwind->icfEqClass[icfPass % 2] !=
- db->compactUnwind->icfEqClass[icfPass % 2] ||
+ if (da->unwindEntry->icfEqClass[icfPass % 2] !=
+ db->unwindEntry->icfEqClass[icfPass % 2] ||
da->value != 0 || db->value != 0)
return false;
auto isZero = [](Defined *d) { return d->value == 0; };
@@ -363,8 +363,8 @@ void macho::foldIdenticalSections() {
if (isHashable) {
hashable.push_back(isec);
for (Defined *d : isec->symbols)
- if (d->compactUnwind)
- hashable.push_back(d->compactUnwind);
+ if (d->unwindEntry)
+ hashable.push_back(d->unwindEntry);
} else {
isec->icfEqClass[0] = ++icfUniqueID;
}
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 7bc432296024d..558de4131cb9f 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -277,17 +277,16 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
if (sec.align >= 32) {
error("alignment " + std::to_string(sec.align) + " of section " + name +
" is too large");
- sections.push_back({});
+ sections.push_back(sec.addr);
continue;
}
uint32_t align = 1 << sec.align;
uint32_t flags = sec.flags;
auto splitRecords = [&](int recordSize) -> void {
- sections.push_back({});
+ sections.push_back(sec.addr);
if (data.empty())
return;
-
Subsections &subsections = sections.back().subsections;
subsections.reserve(data.size() / recordSize);
auto *isec = make<ConcatInputSection>(
@@ -319,10 +318,12 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
isec = make<WordLiteralInputSection>(segname, name, this, data, align,
flags);
}
- sections.push_back({});
+ sections.push_back(sec.addr);
sections.back().subsections.push_back({0, isec});
} else if (auto recordSize = getRecordSize(segname, name)) {
splitRecords(*recordSize);
+ if (name == section_names::compactUnwind)
+ compactUnwindSection = §ions.back();
} else if (segname == segment_names::llvm) {
// ld64 does not appear to emit contents from sections within the __LLVM
// segment. Symbols within those sections point to bitcode metadata
@@ -330,7 +331,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
// have the same name without causing duplicate symbol errors. Push an
// empty entry to ensure indices line up for the remaining sections.
// TODO: Evaluate whether the bitcode metadata is needed.
- sections.push_back({});
+ sections.push_back(sec.addr);
} else {
auto *isec =
make<ConcatInputSection>(segname, name, this, data, align, flags);
@@ -340,10 +341,10 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
// object files that contain them. We filter them out early to avoid
// parsing their relocations unnecessarily. But we must still push an
// empty entry to ensure the indices line up for the remaining sections.
- sections.push_back({});
+ sections.push_back(sec.addr);
debugSections.push_back(isec);
} else {
- sections.push_back({});
+ sections.push_back(sec.addr);
sections.back().subsections.push_back({0, isec});
}
}
@@ -358,7 +359,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
// same location as an offset relative to the start of the containing
// subsection.
template <class T>
-static InputSection *findContainingSubsection(Subsections &subsections,
+static InputSection *findContainingSubsection(const Subsections &subsections,
T *offset) {
static_assert(std::is_same<uint64_t, T>::value ||
std::is_same<uint32_t, T>::value,
@@ -437,8 +438,17 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
// and insert them. Storing addends in the instruction stream is
// possible, but inconvenient and more costly at link time.
- int64_t pairedAddend = 0;
relocation_info relInfo = relInfos[i];
+ bool isSubtrahend =
+ target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
+ if (isSubtrahend && StringRef(sec.sectname) == section_names::ehFrame) {
+ // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
+ // adds local "EH_Frame1" and "func.eh". Ignore them because they have
+ // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
+ ++i;
+ continue;
+ }
+ int64_t pairedAddend = 0;
if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
relInfo = relInfos[++i];
@@ -449,8 +459,6 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
if (relInfo.r_address & R_SCATTERED)
fatal("TODO: Scattered relocations not supported");
- bool isSubtrahend =
- target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo);
assert(!(embeddedAddend && pairedAddend));
int64_t totalAddend = pairedAddend + embeddedAddend;
@@ -691,12 +699,17 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
Subsections &subsections = sections[i].subsections;
if (subsections.empty())
continue;
-
+ InputSection *lastIsec = subsections.back().isec;
+ if (lastIsec->getName() == section_names::ehFrame) {
+ // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
+ // adds local "EH_Frame1" and "func.eh". Ignore them because they have
+ // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
+ continue;
+ }
std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
uint64_t sectionAddr = sectionHeaders[i].addr;
uint32_t sectionAlign = 1u << sectionHeaders[i].align;
- InputSection *lastIsec = subsections.back().isec;
// Record-based sections have already been split into subsections during
// parseSections(), so we simply need to match Symbols to the corresponding
// subsection here.
@@ -725,11 +738,11 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
return nList[lhs].n_value < nList[rhs].n_value;
});
- Subsection subsec = subsections.back();
for (size_t j = 0; j < symbolIndices.size(); ++j) {
uint32_t symIndex = symbolIndices[j];
const NList &sym = nList[symIndex];
StringRef name = strtab + sym.n_strx;
+ Subsection &subsec = subsections.back();
InputSection *isec = subsec.isec;
uint64_t subsecAddr = sectionAddr + subsec.offset;
@@ -773,7 +786,6 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
// emulating that behavior.
nextIsec->align = MinAlign(sectionAlign, sym.n_value);
subsections.push_back({sym.n_value - sectionAddr, nextIsec});
- subsec = subsections.back();
}
}
@@ -799,7 +811,7 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
make<ConcatInputSection>(segName.take_front(16), sectName.take_front(16),
/*file=*/this, data);
isec->live = true;
- sections.push_back({});
+ sections.push_back(0);
sections.back().subsections.push_back({0, isec});
}
@@ -869,7 +881,8 @@ template <class LP> void ObjFile::parse() {
parseDebugInfo();
if (config->emitDataInCodeInfo)
parseDataInCode();
- registerCompactUnwind();
+ if (compactUnwindSection)
+ registerCompactUnwind();
}
void ObjFile::parseDebugInfo() {
@@ -912,20 +925,7 @@ void ObjFile::parseDataInCode() {
// Create pointers from symbols to their associated compact unwind entries.
void ObjFile::registerCompactUnwind() {
- // First, locate the __compact_unwind section.
- Section *cuSection = nullptr;
- for (Section §ion : sections) {
- if (section.subsections.empty())
- continue;
- if (section.subsections[0].isec->getSegName() != segment_names::ld)
- continue;
- cuSection = §ion;
- break;
- }
- if (!cuSection)
- return;
-
- for (Subsection &subsection : cuSection->subsections) {
+ for (const Subsection &subsection : compactUnwindSection->subsections) {
ConcatInputSection *isec = cast<ConcatInputSection>(subsection.isec);
// Hack!! Since each CUE contains a
diff erent function address, if ICF
// operated naively and compared the entire contents of each CUE, entries
@@ -939,7 +939,7 @@ void ObjFile::registerCompactUnwind() {
ConcatInputSection *referentIsec;
for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
Reloc &r = *it;
- // We only wish to handle the relocation for CUE::functionAddress.
+ // CUE::functionAddress is at offset 0. Skip personality & LSDA relocs.
if (r.offset != 0) {
++it;
continue;
@@ -974,7 +974,7 @@ void ObjFile::registerCompactUnwind() {
++it;
continue;
}
- (*symIt)->compactUnwind = isec;
+ (*symIt)->unwindEntry = isec;
// Since we've sliced away the functionAddress, we should remove the
// corresponding relocation too. Given that clang emits relocations in
// reverse order of address, this relocation should be at the end of the
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 2c870b78316fd..93794cb5a4aae 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -41,6 +41,7 @@ namespace macho {
struct PlatformInfo;
class ConcatInputSection;
class Symbol;
+class Defined;
struct Reloc;
enum class RefState : uint8_t;
@@ -59,8 +60,9 @@ struct Subsection {
using Subsections = std::vector<Subsection>;
struct Section {
- // uint64_t address = 0; // TODO(gkm): this will debut with __eh_frame handler
+ uint64_t address = 0;
Subsections subsections;
+ Section(uint64_t addr) : address(addr){};
};
class InputFile {
@@ -114,6 +116,8 @@ class ObjFile final : public InputFile {
ArrayRef<llvm::MachO::data_in_code_entry> dataInCodeEntries;
private:
+ Section *compactUnwindSection = nullptr;
+
template <class LP> void parse();
template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
template <class LP>
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 0dbe6a619e985..96167b72a724f 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -120,10 +120,11 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
it = symbols.begin();
uint64_t v = (*it)->value;
for (++it; it != symbols.end(); ++it) {
- if ((*it)->value == v)
- (*it)->compactUnwind = nullptr;
+ Defined *d = *it;
+ if (d->value == v)
+ d->unwindEntry = nullptr;
else
- v = (*it)->value;
+ v = d->value;
}
}
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index e8aefc16f6adb..1183e32fbabf9 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -298,6 +298,7 @@ constexpr const char debugAbbrev[] = "__debug_abbrev";
constexpr const char debugInfo[] = "__debug_info";
constexpr const char debugStr[] = "__debug_str";
constexpr const char ehFrame[] = "__eh_frame";
+constexpr const char gccExceptTab[] = "__gcc_except_tab";
constexpr const char export_[] = "__export";
constexpr const char dataInCode[] = "__data_in_code";
constexpr const char functionStarts[] = "__func_starts";
diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp
index e4aba8fb90d42..4269c8342c656 100644
--- a/lld/MachO/MarkLive.cpp
+++ b/lld/MachO/MarkLive.cpp
@@ -51,8 +51,8 @@ void markLive() {
if (auto *d = dyn_cast<Defined>(s)) {
if (d->isec)
enqueue(d->isec, d->value);
- if (d->compactUnwind)
- enqueue(d->compactUnwind, 0);
+ if (d->unwindEntry)
+ enqueue(d->unwindEntry, 0);
}
};
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 35b6d040038f0..bb6d073dcf308 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -95,8 +95,8 @@ uint64_t Defined::getVA() const {
}
void Defined::canonicalize() {
- if (compactUnwind)
- compactUnwind = compactUnwind->canonical();
+ if (unwindEntry)
+ unwindEntry = unwindEntry->canonical();
if (isec)
isec = isec->canonical();
}
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 2b44711861bb5..cc6f51cc5fd3a 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -168,7 +168,7 @@ class Defined : public Symbol {
uint64_t value;
// size is only calculated for regular (non-bitcode) symbols.
uint64_t size;
- ConcatInputSection *compactUnwind = nullptr;
+ ConcatInputSection *unwindEntry = nullptr;
};
// This enum does double-duty: as a symbol property, it indicates whether & how
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 149bb10cb9086..690098c7a3b78 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -154,8 +154,8 @@ void UnwindInfoSection::prepareRelocations() {
// entries to the GOT. Hence the use of a MapVector for
// UnwindInfoSection::symbols.
for (const Defined *d : make_second_range(symbols))
- if (d->compactUnwind)
- prepareRelocations(d->compactUnwind);
+ if (d->unwindEntry)
+ prepareRelocations(d->unwindEntry);
}
// Record function symbols that may need entries emitted in __unwind_info, which
@@ -169,7 +169,7 @@ void UnwindInfoSection::prepareRelocations() {
// symbols for each unique address regardless of whether they have associated
// unwind info.
void UnwindInfoSection::addSymbol(const Defined *d) {
- if (d->compactUnwind)
+ if (d->unwindEntry)
allEntriesAreOmitted = false;
// We don't yet know the final output address of this symbol, but we know that
// they are uniquely determined by a combination of the isec and value, so
@@ -177,8 +177,8 @@ void UnwindInfoSection::addSymbol(const Defined *d) {
auto p = symbols.insert({{d->isec, d->value}, d});
// If we have multiple symbols at the same address, only one of them can have
// an associated CUE.
- if (!p.second && d->compactUnwind) {
- assert(!p.first->second->compactUnwind);
+ if (!p.second && d->unwindEntry) {
+ assert(!p.first->second->unwindEntry);
p.first->second = d;
}
}
@@ -202,11 +202,19 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
Reloc &r = isec->relocs[i];
assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
+ // Functions and LSDA entries always reside in the same object file as the
+ // compact unwind entries that references them, and thus appear as section
+ // relocs. There is no need to prepare them. We only prepare relocs for
+ // personality functions.
if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
offsetof(CompactUnwindEntry<Ptr>, personality))
continue;
if (auto *s = r.referent.dyn_cast<Symbol *>()) {
+ // Personality functions are nearly always system-defined (e.g.,
+ // ___gxx_personality_v0 for C++) and relocated as dylib symbols. When an
+ // application provides its own personality function, it might be
+ // referenced by an extern Defined symbol reloc, or a local section reloc.
if (auto *defined = dyn_cast<Defined>(s)) {
// XXX(vyng) This is a a special case for handling duplicate personality
// symbols. Note that LD64's behavior is a bit
diff erent and it is
@@ -291,13 +299,13 @@ void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
const Defined *d = symbolsVec[i].second;
// Write the functionAddress.
writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2);
- if (!d->compactUnwind)
+ if (!d->unwindEntry)
return;
// Write the rest of the CUE.
- memcpy(buf + sizeof(Ptr), d->compactUnwind->data.data(),
- d->compactUnwind->data.size());
- for (const Reloc &r : d->compactUnwind->relocs) {
+ memcpy(buf + sizeof(Ptr), d->unwindEntry->data.data(),
+ d->unwindEntry->data.size());
+ for (const Reloc &r : d->unwindEntry->relocs) {
uint64_t referentVA = 0;
if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
if (!isa<Undefined>(referentSym)) {
@@ -432,9 +440,8 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
// So we check their relocations instead.
// FIXME: should we account for an LSDA at an absolute address? ld64 seems
// to support it, but it seems unlikely to be used in practice.
- Reloc *lsda1 =
- findLsdaReloc(symbolsVec[*foldBegin].second->compactUnwind);
- Reloc *lsda2 = findLsdaReloc(symbolsVec[*foldEnd].second->compactUnwind);
+ Reloc *lsda1 = findLsdaReloc(symbolsVec[*foldBegin].second->unwindEntry);
+ Reloc *lsda2 = findLsdaReloc(symbolsVec[*foldEnd].second->unwindEntry);
if (lsda1 == nullptr && lsda2 == nullptr)
continue;
if (lsda1 == nullptr || lsda2 == nullptr)
@@ -536,7 +543,8 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
for (size_t idx : cuIndices) {
lsdaIndex[idx] = entriesWithLsda.size();
- if (findLsdaReloc(symbolsVec[idx].second->compactUnwind))
+ const Defined *d = symbolsVec[idx].second;
+ if (findLsdaReloc(d->unwindEntry))
entriesWithLsda.push_back(idx);
}
@@ -614,7 +622,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
for (size_t idx : entriesWithLsda) {
const CompactUnwindEntry<Ptr> &cu = cuEntries[idx];
const Defined *d = symbolsVec[idx].second;
- if (Reloc *r = findLsdaReloc(d->compactUnwind)) {
+ if (Reloc *r = findLsdaReloc(d->unwindEntry)) {
uint64_t va;
if (auto *isec = r->referent.dyn_cast<InputSection *>()) {
va = isec->getVA(r->addend);
More information about the llvm-commits
mailing list