[lld] [lld-macho][NFC] Preserve original symbol isec, unwindEntry and size (PR #88357)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 11 06:56:58 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld-macho
Author: None (alx32)
<details>
<summary>Changes</summary>
Currently, when moving symbols from one `InputSection` to another (like in ICF) we directly update the symbol's `isec`, `unwindEntry` and `size`. By doing this we lose the original information. This information will be needed in a future change. Since when moving symbols we always set the symbol's `wasCoalesced` and `isec-> replacement`, we can just use this info to conditionally get the information we need at access time.
---
Patch is 38.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88357.diff
14 Files Affected:
- (modified) lld/MachO/ICF.cpp (+16-16)
- (modified) lld/MachO/InputFiles.cpp (+10-9)
- (modified) lld/MachO/InputSection.cpp (+2-4)
- (modified) lld/MachO/MapFile.cpp (+12-5)
- (modified) lld/MachO/MarkLive.cpp (+5-5)
- (modified) lld/MachO/ObjC.cpp (+18-18)
- (modified) lld/MachO/Relocations.cpp (+1-1)
- (modified) lld/MachO/SectionPriorities.cpp (+4-4)
- (modified) lld/MachO/SymbolTable.cpp (+6-6)
- (modified) lld/MachO/Symbols.cpp (+19-15)
- (modified) lld/MachO/Symbols.h (+11-6)
- (modified) lld/MachO/SyntheticSections.cpp (+8-8)
- (modified) lld/MachO/UnwindInfoSection.cpp (+20-19)
- (modified) lld/MachO/Writer.cpp (+2-4)
``````````diff
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 0278bf7c6751a2..fc786b571dc64f 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -133,13 +133,13 @@ bool ICF::equalsConstant(const ConcatInputSection *ia,
assert(isa<Defined>(sa));
const auto *da = cast<Defined>(sa);
const auto *db = cast<Defined>(sb);
- if (!da->isec || !db->isec) {
+ if (!da->isec() || !db->isec()) {
assert(da->isAbsolute() && db->isAbsolute());
return da->value + ra.addend == db->value + rb.addend;
}
- isecA = da->isec;
+ isecA = da->isec();
valueA = da->value;
- isecB = db->isec;
+ isecB = db->isec();
valueB = db->value;
} else {
isecA = ra.referent.get<InputSection *>();
@@ -191,10 +191,10 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
if (da->isAbsolute())
return true;
- isecA = dyn_cast<ConcatInputSection>(da->isec);
+ isecA = dyn_cast<ConcatInputSection>(da->isec());
if (!isecA)
return true; // literal sections were checked in equalsConstant.
- isecB = cast<ConcatInputSection>(db->isec);
+ isecB = cast<ConcatInputSection>(db->isec());
} else {
const auto *sa = ra.referent.get<InputSection *>();
const auto *sb = rb.referent.get<InputSection *>();
@@ -212,7 +212,7 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
// info matches. For simplicity, we only handle the case where there are only
// symbols at offset zero within the section (which is typically the case with
// .subsections_via_symbols.)
- auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; };
+ auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; };
const auto *itA = llvm::find_if(ia->symbols, hasUnwind);
const auto *itB = llvm::find_if(ib->symbols, hasUnwind);
if (itA == ia->symbols.end())
@@ -221,8 +221,8 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
return false;
const Defined *da = *itA;
const Defined *db = *itB;
- if (da->unwindEntry->icfEqClass[icfPass % 2] !=
- db->unwindEntry->icfEqClass[icfPass % 2] ||
+ if (da->unwindEntry()->icfEqClass[icfPass % 2] !=
+ db->unwindEntry()->icfEqClass[icfPass % 2] ||
da->value != 0 || db->value != 0)
return false;
auto isZero = [](Defined *d) { return d->value == 0; };
@@ -289,13 +289,13 @@ void ICF::run() {
for (const Reloc &r : isec->relocs) {
if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
if (auto *defined = dyn_cast<Defined>(sym)) {
- if (defined->isec) {
+ if (defined->isec()) {
if (auto *referentIsec =
- dyn_cast<ConcatInputSection>(defined->isec))
+ dyn_cast<ConcatInputSection>(defined->isec()))
hash += defined->value + referentIsec->icfEqClass[icfPass % 2];
else
- hash += defined->isec->kind() +
- defined->isec->getOffset(defined->value);
+ hash += defined->isec()->kind() +
+ defined->isec()->getOffset(defined->value);
} else {
hash += defined->value;
}
@@ -368,8 +368,8 @@ void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
void macho::markSymAsAddrSig(Symbol *s) {
if (auto *d = dyn_cast_or_null<Defined>(s))
- if (d->isec)
- d->isec->keepUnique = true;
+ if (d->isec())
+ d->isec()->keepUnique = true;
}
void macho::markAddrSigSymbols() {
@@ -430,8 +430,8 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
if (isFoldable) {
foldable.push_back(isec);
for (Defined *d : isec->symbols)
- if (d->unwindEntry)
- foldable.push_back(d->unwindEntry);
+ if (d->unwindEntry())
+ foldable.push_back(d->unwindEntry());
// Some sections have embedded addends that foil ICF's hashing / equality
// checks. (We can ignore embedded addends when doing ICF because the same
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index b36d390cc16ade..8d66b37534f47e 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -1170,7 +1170,7 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
continue;
}
add += sym->value;
- referentIsec = cast<ConcatInputSection>(sym->isec);
+ referentIsec = cast<ConcatInputSection>(sym->isec());
} else {
referentIsec =
cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
@@ -1191,7 +1191,7 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
++it;
continue;
}
- d->unwindEntry = isec;
+ d->originalUnwindEntry = isec;
// Now that the symbol points to the unwind entry, we can remove the reloc
// that points from the unwind entry back to the symbol.
//
@@ -1348,7 +1348,7 @@ targetSymFromCanonicalSubtractor(const InputSection *isec,
}
if (Invert)
std::swap(pcSym, target);
- if (pcSym->isec == isec) {
+ if (pcSym->isec() == isec) {
if (pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset)
fatal("invalid FDE relocation in __eh_frame");
} else {
@@ -1420,7 +1420,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
// We already have an explicit relocation for the CIE offset.
cieIsec =
targetSymFromCanonicalSubtractor</*Invert=*/true>(isec, cieOffRelocIt)
- ->isec;
+ ->isec();
dataOff += sizeof(uint32_t);
} else {
// If we haven't found a relocation, then the CIE offset is most likely
@@ -1480,15 +1480,15 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
// to register the unwind entry under same symbol.
// This is not particularly efficient, but we should run into this case
// infrequently (only when handling the output of `ld -r`).
- if (funcSym->isec)
- funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec),
+ if (funcSym->isec())
+ funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec()),
funcSym->value);
} else {
funcSym = findSymbolAtAddress(sections, funcAddr);
ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize);
}
// The symbol has been coalesced, or already has a compact unwind entry.
- if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry) {
+ if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry()) {
// We must prune unused FDEs for correctness, so we cannot rely on
// -dead_strip being enabled.
isec->live = false;
@@ -1497,7 +1497,8 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
InputSection *lsdaIsec = nullptr;
if (lsdaAddrRelocIt != isec->relocs.end()) {
- lsdaIsec = targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec;
+ lsdaIsec =
+ targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec();
} else if (lsdaAddrOpt) {
uint64_t lsdaAddr = *lsdaAddrOpt;
Section *sec = findContainingSection(sections, &lsdaAddr);
@@ -1507,7 +1508,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
}
fdes[isec] = {funcLength, cie.personalitySymbol, lsdaIsec};
- funcSym->unwindEntry = isec;
+ funcSym->originalUnwindEntry = isec;
ehRelocator.commit();
}
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 5c1e07cd21b1fb..904701731684b3 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -194,10 +194,8 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
copy->live = false;
copy->wasCoalesced = true;
copy->replacement = this;
- for (auto ©Sym : copy->symbols) {
+ for (auto ©Sym : copy->symbols)
copySym->wasIdenticalCodeFolded = true;
- copySym->size = 0;
- }
symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end());
copy->symbols.clear();
@@ -207,7 +205,7 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
return;
for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) {
assert((*it)->value == 0);
- (*it)->unwindEntry = nullptr;
+ (*it)->originalUnwindEntry = nullptr;
}
}
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 2a31a5c09cdd22..5bcaeca48da2a2 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -77,8 +77,8 @@ static MapInfo gatherMapInfo() {
// Only emit the prevailing definition of a symbol. Also, don't emit
// the symbol if it is part of a cstring section (we use the literal
// value instead, similar to ld64)
- if (d->isec && d->getFile() == file &&
- !isa<CStringInputSection>(d->isec)) {
+ if (d->isec() && d->getFile() == file &&
+ !isa<CStringInputSection>(d->isec())) {
isReferencedFile = true;
if (!d->isLive())
info.deadSymbols.push_back(d);
@@ -155,6 +155,12 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
target->wordSize, sym->getName().str().data());
}
+static uint64_t getSymSizeForMap(Defined *sym) {
+ if (sym->wasIdenticalCodeFolded)
+ return 0;
+ return sym->size;
+}
+
void macho::writeMapFile() {
if (config->mapFile.empty())
return;
@@ -201,9 +207,10 @@ void macho::writeMapFile() {
auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
for (const ConcatInputSection *isec : arr) {
for (Defined *sym : isec->symbols) {
- if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
+ if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0))
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
- sym->size, readerToFileOrdinal[sym->getFile()],
+ getSymSizeForMap(sym),
+ readerToFileOrdinal[sym->getFile()],
sym->getName().str().data());
}
}
@@ -255,7 +262,7 @@ void macho::writeMapFile() {
os << "# \tSize \tFile Name\n";
for (Defined *sym : info.deadSymbols) {
assert(!sym->isLive());
- os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
+ os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", getSymSizeForMap(sym),
readerToFileOrdinal[sym->getFile()],
sym->getName().str().data());
}
diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp
index a37213d5613afb..c26c3aa321197e 100644
--- a/lld/MachO/MarkLive.cpp
+++ b/lld/MachO/MarkLive.cpp
@@ -110,10 +110,10 @@ void MarkLiveImpl<RecordWhyLive>::addSym(
if (!config->whyLive.empty() && config->whyLive.match(s->getName()))
printWhyLive(s, prev);
if (auto *d = dyn_cast<Defined>(s)) {
- if (d->isec)
- enqueue(d->isec, d->value, prev);
- if (d->unwindEntry)
- enqueue(d->unwindEntry, 0, prev);
+ if (d->isec())
+ enqueue(d->isec(), d->value, prev);
+ if (d->unwindEntry())
+ enqueue(d->unwindEntry(), 0, prev);
}
}
@@ -179,7 +179,7 @@ void MarkLiveImpl<RecordWhyLive>::markTransitively() {
if (s->isLive()) {
InputSection *referentIsec = nullptr;
if (auto *d = dyn_cast<Defined>(s))
- referentIsec = d->isec;
+ referentIsec = d->isec();
enqueue(isec, 0, makeEntry(referentIsec, nullptr));
}
} else {
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 5902b82d30f556..6d5d35809ba5a1 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -191,8 +191,8 @@ static StringRef getReferentString(const Reloc &r) {
if (auto *isec = r.referent.dyn_cast<InputSection *>())
return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);
auto *sym = cast<Defined>(r.referent.get<Symbol *>());
- return cast<CStringInputSection>(sym->isec)->getStringRefAtOffset(sym->value +
- r.addend);
+ return cast<CStringInputSection>(sym->isec())
+ ->getStringRefAtOffset(sym->value + r.addend);
}
void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
@@ -306,7 +306,7 @@ void ObjcCategoryChecker::parseClass(const Defined *classSym) {
return nullptr;
};
- const auto *classIsec = cast<ConcatInputSection>(classSym->isec);
+ const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
// Parse instance methods.
if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
@@ -314,7 +314,7 @@ void ObjcCategoryChecker::parseClass(const Defined *classSym) {
MK_Instance);
// Class methods are contained in the metaclass.
- if (const auto *r = classSym->isec->getRelocAt(classLayout.metaClassOffset))
+ if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
if (const auto *classMethodsIsec = getMethodsIsec(
cast<ConcatInputSection>(r->getReferentInputSection())))
parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
@@ -561,9 +561,9 @@ void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
if (!sym)
return;
- if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec))
+ if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
eraseISec(cisec);
- else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec)) {
+ else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
uint32_t totalOffset = sym->value + reloc->addend;
StringPiece &piece = csisec->getStringPiece(totalOffset);
piece.live = false;
@@ -588,7 +588,7 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
assert(catNameSym && "Category does not have a valid name Symbol");
collectSectionWriteInfoFromIsec<CStringSection>(
- catNameSym->isec, infoCategoryWriter.catNameInfo);
+ catNameSym->isec(), infoCategoryWriter.catNameInfo);
}
// Collect writer info from all the category lists (we're assuming they all
@@ -599,7 +599,7 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
if (Defined *ptrList =
tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
collectSectionWriteInfoFromIsec<ConcatOutputSection>(
- ptrList->isec, infoCategoryWriter.catPtrListInfo);
+ ptrList->isec(), infoCategoryWriter.catPtrListInfo);
// we've successfully collected data, so we can break
break;
}
@@ -627,7 +627,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
// platform pointer size, but to simplify implementation we always just read
// the lower 32b which should be good enough.
uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
- ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
+ ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
ptrList.structCount += protocolCount;
ptrList.structSize = target->wordSize;
@@ -636,7 +636,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
(protocolCount * target->wordSize) +
/*header(count)*/ protocolListHeaderLayout.totalSize +
/*extra null value*/ target->wordSize;
- assert(expectedListSize == ptrListSym->isec->data.size() &&
+ assert(expectedListSize == ptrListSym->isec()->data.size() &&
"Protocol list does not match expected size");
// Suppress unsuded var warning
@@ -644,7 +644,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
uint32_t off = protocolListHeaderLayout.totalSize;
for (uint32_t inx = 0; inx < protocolCount; ++inx) {
- const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
+ const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
assert(reloc && "No reloc found at protocol list offset");
auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
@@ -653,7 +653,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
ptrList.allPtrs.push_back(listSym);
off += target->wordSize;
}
- assert((ptrListSym->isec->getRelocAt(off) == nullptr) &&
+ assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
"expected null terminating protocol");
assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
"Protocol list end offset does not match expected size");
@@ -678,9 +678,9 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
assert(ptrListSym && "Reloc does not have a valid Defined");
uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
- ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
+ ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
- ptrListSym->isec->data.data() + listHeaderLayout.structCountOffset);
+ ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
@@ -690,12 +690,12 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
uint32_t expectedListSize =
listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
- assert(expectedListSize == ptrListSym->isec->data.size() &&
+ assert(expectedListSize == ptrListSym->isec()->data.size() &&
"Pointer list does not match expected size");
for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
off += target->wordSize) {
- const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
+ const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
assert(reloc && "No reloc found at pointer list offset");
auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
@@ -1054,7 +1054,7 @@ void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
r.offset = offset;
r.addend = 0;
r.referent = const_cast<Symbol *>(refTo);
- refFrom->isec->relocs.push_back(r);
+ refFrom->isec()->relocs.push_back(r);
}
void ObjcCategoryMerger::collectAndValidateCategoriesData() {
@@ -1076,7 +1076,7 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
if (!categorySym->getName().starts_with(objc::symbol_names::category))
continue;
- auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec);
+ auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
assert(catBodyIsec &&
"Category data section is not an ConcatInputSection");
diff --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp
index 4e840c6912cc57..afe7f454e6a230 100644
--- a/lld/MachO/Relocations.cpp
+++ b/lld/MachO/Relocations.cpp
@@ -24,7 +24,7 @@ static_assert(sizeof(void *) != 8 || sizeof(Reloc) == 24,
InputSection *Reloc::getReferentInputSection() const {
if (const auto *sym = referent.dyn_cast<Symbol *>()) {
if (const auto *d = dyn_cast<Defined>(sym))
- return d->isec;
+ return d->isec();
return nullptr;
} else {
return referent.get<InputSection *>();
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 976ea03980e963..907aee29d2386f 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -236,7 +236,7 @@ DenseMap<const InputSection *, size_t> CallGraphSort::run() {
// section.
for (Symbol *sym : isec->getFile()->symbols) {
if (auto *d = dyn_cast_or_null<Defined>(sym)) {
- if (d->isec == isec)
+ if (d->isec() == isec)
os << sym->getName() << "\n";
}
}
@@ -258,7 +258,7 @@ macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
if (it == priorities.end())
return std::nullopt;
const SymbolPriorityEntry &entry = it->second;
- const InputFile *f = sym->isec->...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/88357
More information about the llvm-commits
mailing list