[lld] a9353db - [lld-macho] Simplify the handling of "no unwind info" functions
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 26 13:04:49 PDT 2021
Author: Jez Ng
Date: 2021-10-26T16:04:16-04:00
New Revision: a9353dbe517c522ac63818de2cde645acdc46834
URL: https://github.com/llvm/llvm-project/commit/a9353dbe517c522ac63818de2cde645acdc46834
DIFF: https://github.com/llvm/llvm-project/commit/a9353dbe517c522ac63818de2cde645acdc46834.diff
LOG: [lld-macho] Simplify the handling of "no unwind info" functions
This diff does away with `addEntriesForFunctionsWithoutUnwindInfo()`,
because `addSymbol()` can now determine which functions need those
entries.
While overhauling UnwindInfoSection, I also parallelized the relocation
of the contents of the CUEs. This somewhat offsets the time regression
from creating one InputSection per CUE (which was done in D109944).
Reviewed By: #lld-macho, oontvoo
Differential Revision: https://reviews.llvm.org/D109945
Added:
Modified:
lld/MachO/UnwindInfoSection.cpp
lld/MachO/UnwindInfoSection.h
lld/MachO/Writer.cpp
Removed:
################################################################################
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 6ffd35ffd033..dca537493de4 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/Parallel.h"
using namespace llvm;
using namespace llvm::MachO;
@@ -92,6 +93,14 @@ using namespace lld::macho;
// TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410
// TODO(gkm): how do we align the 2nd-level pages?
+template <class Ptr> struct CompactUnwindEntry {
+ Ptr functionAddress;
+ uint32_t functionLength;
+ compact_unwind_encoding_t encoding;
+ Ptr personality;
+ Ptr lsda;
+};
+
using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
struct SecondLevelPage {
@@ -107,7 +116,7 @@ template <class Ptr>
class UnwindInfoSectionImpl final : public UnwindInfoSection {
public:
void prepareRelocations(ConcatInputSection *) override;
- void addSymbol(const Defined *) override;
+ void relocateCompactUnwind(std::vector<CompactUnwindEntry<Ptr>> &);
void finalize() override;
void writeTo(uint8_t *buf) const override;
@@ -131,20 +140,39 @@ class UnwindInfoSectionImpl final : public UnwindInfoSection {
UnwindInfoSection::UnwindInfoSection()
: SyntheticSection(segment_names::text, section_names::unwindInfo) {
align = 4;
- compactUnwindSection =
- make<ConcatOutputSection>(section_names::compactUnwind);
}
void UnwindInfoSection::prepareRelocations() {
- for (ConcatInputSection *isec : compactUnwindSection->inputs)
- prepareRelocations(isec);
+ // This iteration needs to be deterministic, since prepareRelocations may add
+ // entries to the GOT. Hence the use of a MapVector for
+ // UnwindInfoSection::symbols.
+ for (const Defined *d : make_second_range(symbols))
+ if (d->compactUnwind)
+ prepareRelocations(d->compactUnwind);
}
-template <class Ptr>
-void UnwindInfoSectionImpl<Ptr>::addSymbol(const Defined *d) {
- if (d->compactUnwind) {
- d->compactUnwind->parent = compactUnwindSection;
- compactUnwindSection->addInput(d->compactUnwind);
+// Record function symbols that may need entries emitted in __unwind_info, which
+// stores unwind data for address ranges.
+//
+// Note that if several adjacent functions have the same unwind encoding, LSDA,
+// and personality function, they share one unwind entry. For this to work,
+// functions without unwind info need explicit "no unwind info" unwind entries
+// -- else the unwinder would think they have the unwind info of the closest
+// function with unwind info right before in the image. Thus, we add function
+// symbols for each unique address regardless of whether they have associated
+// unwind info.
+void UnwindInfoSection::addSymbol(const Defined *d) {
+ if (d->compactUnwind)
+ allEntriesAreOmitted = false;
+ // We don't yet know the final output address of this symbol, but we know that
+ // they are uniquely determined by a combination of the isec and value, so
+ // we use that as the key here.
+ auto p = symbols.insert({{d->isec, d->value}, d});
+ // If we have multiple symbols at the same address, only one of them can have
+ // an associated CUE.
+ if (!p.second && d->compactUnwind) {
+ assert(!p.first->second->compactUnwind);
+ p.first->second = d;
}
}
@@ -167,18 +195,6 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
Reloc &r = isec->relocs[i];
assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
- if (r.offset % sizeof(CompactUnwindEntry<Ptr>) == 0) {
- InputSection *referentIsec;
- if (auto *isec = r.referent.dyn_cast<InputSection *>())
- referentIsec = isec;
- else
- referentIsec = cast<Defined>(r.referent.dyn_cast<Symbol *>())->isec;
-
- if (!cast<ConcatInputSection>(referentIsec)->shouldOmitFromOutput())
- allEntriesAreOmitted = false;
- continue;
- }
-
if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
offsetof(CompactUnwindEntry<Ptr>, personality))
continue;
@@ -260,17 +276,21 @@ static ConcatInputSection *checkTextSegment(InputSection *isec) {
// relocations here: since we are not emitting the pre-link CU section, there
// is no source address to make a relative location meaningful.
template <class Ptr>
-static void
-relocateCompactUnwind(ConcatOutputSection *compactUnwindSection,
- std::vector<CompactUnwindEntry<Ptr>> &cuVector) {
- for (const ConcatInputSection *isec : compactUnwindSection->inputs) {
- assert(isec->parent == compactUnwindSection);
-
- uint8_t *buf =
- reinterpret_cast<uint8_t *>(cuVector.data()) + isec->outSecOff;
- memcpy(buf, isec->data.data(), isec->data.size());
-
- for (const Reloc &r : isec->relocs) {
+void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
+ std::vector<CompactUnwindEntry<Ptr>> &cuVector) {
+ auto symbolsVec = symbols.takeVector();
+ parallelForEachN(0, symbolsVec.size(), [&](size_t i) {
+ uint8_t *buf = reinterpret_cast<uint8_t *>(cuVector.data()) +
+ i * sizeof(CompactUnwindEntry<Ptr>);
+ const Defined *d = symbolsVec[i].second;
+ // Write the functionAddress.
+ writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2);
+ if (!d->compactUnwind)
+ return;
+
+ // Write the rest of the CUE.
+ memcpy(buf, d->compactUnwind->data.data(), d->compactUnwind->data.size());
+ for (const Reloc &r : d->compactUnwind->relocs) {
uint64_t referentVA = 0;
if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
if (!isa<Undefined>(referentSym)) {
@@ -289,7 +309,7 @@ relocateCompactUnwind(ConcatOutputSection *compactUnwindSection,
}
writeAddress(buf + r.offset, referentVA, r.length);
}
- }
+ });
}
// There should only be a handful of unique personality pointers, so we can
@@ -319,39 +339,6 @@ encodePersonalities(const std::vector<CompactUnwindEntry<Ptr> *> &cuPtrVector,
") for compact unwind to encode");
}
-// __unwind_info stores unwind data for address ranges. If several
-// adjacent functions have the same unwind encoding, LSDA, and personality
-// function, they share one unwind entry. For this to work, functions without
-// unwind info need explicit "no unwind info" unwind entries -- else the
-// unwinder would think they have the unwind info of the closest function
-// with unwind info right before in the image.
-template <class Ptr>
-static void addEntriesForFunctionsWithoutUnwindInfo(
- std::vector<CompactUnwindEntry<Ptr>> &cuVector) {
- DenseSet<Ptr> hasUnwindInfo;
- for (CompactUnwindEntry<Ptr> &cuEntry : cuVector)
- hasUnwindInfo.insert(cuEntry.functionAddress);
-
- // Add explicit "has no unwind info" entries for all global and local symbols
- // without unwind info.
- auto markNoUnwindInfo = [&cuVector, &hasUnwindInfo](const Defined *d) {
- if (d->isLive() && d->isec && isCodeSection(d->isec)) {
- Ptr ptr = d->getVA();
- if (!hasUnwindInfo.count(ptr))
- cuVector.push_back({ptr, 0, 0, 0, 0});
- }
- };
- for (Symbol *sym : symtab->getSymbols())
- if (auto *d = dyn_cast<Defined>(sym))
- markNoUnwindInfo(d);
- for (const InputFile *file : inputFiles)
- if (auto *objFile = dyn_cast<ObjFile>(file))
- for (Symbol *sym : objFile->symbols)
- if (auto *d = dyn_cast_or_null<Defined>(sym))
- if (!d->isExternal())
- markNoUnwindInfo(d);
-}
-
static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
// From compact_unwind_encoding.h:
// UNWIND_X86_64_MODE_STACK_IND:
@@ -380,7 +367,7 @@ static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
// Scan the __LD,__compact_unwind entries and compute the space needs of
// __TEXT,__unwind_info and __TEXT,__eh_frame
template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
- if (compactUnwindSection == nullptr)
+ if (symbols.empty())
return;
// At this point, the address space for __TEXT,__text has been
@@ -390,15 +377,8 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
// we can fold adjacent CU entries with identical
// encoding+personality+lsda. Folding is necessary because it reduces
// the number of CU entries by as much as 3 orders of magnitude!
- compactUnwindSection->finalize();
- assert(compactUnwindSection->getSize() % sizeof(CompactUnwindEntry<Ptr>) ==
- 0);
- size_t cuCount =
- compactUnwindSection->getSize() / sizeof(CompactUnwindEntry<Ptr>);
- cuVector.resize(cuCount);
- relocateCompactUnwind(compactUnwindSection, cuVector);
-
- addEntriesForFunctionsWithoutUnwindInfo(cuVector);
+ cuVector.resize(symbols.size());
+ relocateCompactUnwind(cuVector);
// Rather than sort & fold the 32-byte entries directly, we create a
// vector of pointers to entries and sort & fold that instead.
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 864f2db29dfa..f0d4690b94f9 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -11,37 +11,29 @@
#include "ConcatOutputSection.h"
#include "SyntheticSections.h"
+#include "llvm/ADT/MapVector.h"
#include "mach-o/compact_unwind_encoding.h"
namespace lld {
namespace macho {
-template <class Ptr> struct CompactUnwindEntry {
- Ptr functionAddress;
- uint32_t functionLength;
- compact_unwind_encoding_t encoding;
- Ptr personality;
- Ptr lsda;
-};
-
class UnwindInfoSection : public SyntheticSection {
public:
- bool isNeeded() const override {
- return !compactUnwindSection->inputs.empty() && !allEntriesAreOmitted;
- }
+ // If all functions are free of unwind info, we can omit the unwind info
+ // section entirely.
+ bool isNeeded() const override { return !allEntriesAreOmitted; }
uint64_t getSize() const override { return unwindInfoSize; }
- virtual void addSymbol(const Defined *) = 0;
- std::vector<ConcatInputSection *> getInputs() {
- return compactUnwindSection->inputs;
- }
+ void addSymbol(const Defined *);
void prepareRelocations();
protected:
UnwindInfoSection();
virtual void prepareRelocations(ConcatInputSection *) = 0;
- ConcatOutputSection *compactUnwindSection;
+ llvm::MapVector<std::pair<const InputSection *, uint64_t /*Defined::value*/>,
+ const Defined *>
+ symbols;
uint64_t unwindInfoSize = 0;
bool allEntriesAreOmitted = true;
};
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index a53b2b68f4bd..27ad7b2802ba 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -677,7 +677,8 @@ void Writer::scanSymbols() {
continue;
if (defined->overridesWeakDef)
in.weakBinding->addNonWeakDefinition(defined);
- in.unwindInfo->addSymbol(defined);
+ if (!defined->isAbsolute() && isCodeSection(defined->isec))
+ in.unwindInfo->addSymbol(defined);
} else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
// This branch intentionally doesn't check isLive().
if (dysym->isDynamicLookup())
@@ -691,7 +692,8 @@ void Writer::scanSymbols() {
if (auto *objFile = dyn_cast<ObjFile>(file))
for (Symbol *sym : objFile->symbols) {
if (auto *defined = dyn_cast_or_null<Defined>(sym))
- if (!defined->isExternal() && defined->isLive())
+ if (!defined->isExternal() && defined->isLive() &&
+ !defined->isAbsolute() && isCodeSection(defined->isec))
in.unwindInfo->addSymbol(defined);
}
}
More information about the llvm-commits
mailing list