[lld] [LLD, MachO] Add `getUnwindInfoEncodingHash` to `BPSectionOrderer.cpp`. (PR #105587)
Peter Rong via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 10:45:21 PDT 2024
https://github.com/DataCorrupted updated https://github.com/llvm/llvm-project/pull/105587
>From 2c43e32d892963a9124426584cd4ea71fabbeb67 Mon Sep 17 00:00:00 2001
From: Peter Rong <PeterRong at meta.com>
Date: Wed, 21 Aug 2024 14:07:27 -0700
Subject: [PATCH 1/4] [LLD, MachO] Add `getUnwindInfoEncodingHash` to
`BPSectionOrderer.cpp`.
We have seen that the order of UnwindInfo can change the size of the final binary.
To harness that, we add the hash of unwind info (after relocation) to the BPSectionOrdere
Signed-off-by: Peter Rong <PeterRong at meta.com>
---
lld/MachO/BPSectionOrderer.cpp | 22 ++++++++
lld/MachO/UnwindInfoSection.cpp | 95 +++++++++++++++++----------------
lld/MachO/UnwindInfoSection.h | 11 ++++
3 files changed, 81 insertions(+), 47 deletions(-)
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 568843d72bbb50..350b7498a03ce1 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -8,6 +8,7 @@
#include "BPSectionOrderer.h"
#include "InputSection.h"
+#include "UnwindInfoSection.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
@@ -60,6 +61,25 @@ getRelocHash(const Reloc &reloc,
return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
}
+static uint64_t getUnwindInfoEncodingHash(const InputSection *isec) {
+ for (Symbol *sym : isec->symbols) {
+ if (auto *d = dyn_cast_or_null<Defined>(sym)) {
+ if (auto *ue = d->unwindEntry()) {
+ CompactUnwindEntry cu;
+ cu.relocateOneCompactUnwindEntry(d);
+ if (cu.lsda)
+ return xxHash64("HAS LSDA");
+ StringRef name = cu.personality ? cu.personality->getName().empty()
+ ? "<unnamed>"
+ : cu.personality->getName()
+ : "<none>";
+ return xxHash64((name + ";" + Twine::utohexstr(cu.encoding)).str());
+ }
+ }
+ }
+ return 0;
+}
+
static void constructNodesForCompression(
const SmallVector<const InputSection *> §ions,
const DenseMap<const InputSection *, uint64_t> §ionToIdx,
@@ -76,6 +96,8 @@ static void constructNodesForCompression(
const auto *isec = sections[sectionIdx];
constexpr unsigned windowSize = 4;
+ hashes.push_back(getUnwindInfoEncodingHash(isec));
+
for (size_t i = 0; i < isec->data.size(); i++) {
auto window = isec->data.drop_front(i).take_front(windowSize);
hashes.push_back(xxHash64(window));
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 7033481d6014b5..4f55af29b33140 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -109,14 +109,53 @@ CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD);
#undef FOR_EACH_CU_FIELD
-// LLD's internal representation of a compact unwind entry.
-struct CompactUnwindEntry {
- uint64_t functionAddress;
- uint32_t functionLength;
- compact_unwind_encoding_t encoding;
- Symbol *personality;
- InputSection *lsda;
-};
+void lld::macho::CompactUnwindEntry::relocateOneCompactUnwindEntry(
+ const Defined *d) {
+ ConcatInputSection *unwindEntry = d->unwindEntry();
+ assert(unwindEntry);
+
+ functionAddress = d->getVA();
+ // If we have DWARF unwind info, create a slimmed-down CU entry that points
+ // to it.
+ if (unwindEntry->getName() == section_names::ehFrame) {
+ // The unwinder will look for the DWARF entry starting at the hint,
+ // assuming the hint points to a valid CFI record start. If it
+ // fails to find the record, it proceeds in a linear search through the
+ // contiguous CFI records from the hint until the end of the section.
+ // Ideally, in the case where the offset is too large to be encoded, we
+ // would instead encode the largest possible offset to a valid CFI record,
+ // but since we don't keep track of that, just encode zero -- the start of
+ // the section is always the start of a CFI record.
+ uint64_t dwarfOffsetHint =
+ d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
+ ? d->unwindEntry()->outSecOff
+ : 0;
+ encoding = target->modeDwarfEncoding | dwarfOffsetHint;
+ const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
+ functionLength = fde.funcLength;
+ // Omit the DWARF personality from compact-unwind entry so that we
+ // don't need to encode it.
+ personality = nullptr;
+ lsda = fde.lsda;
+ return;
+ }
+
+ assert(unwindEntry->getName() == section_names::compactUnwind);
+
+ CompactUnwindLayout cuLayout(target->wordSize);
+ auto buf = reinterpret_cast<const uint8_t *>(unwindEntry->data.data()) -
+ target->wordSize;
+ functionLength =
+ support::endian::read32le(buf + cuLayout.functionLengthOffset);
+ encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
+ for (const Reloc &r : unwindEntry->relocs) {
+ if (r.offset == cuLayout.personalityOffset)
+ personality = r.referent.get<Symbol *>();
+ else if (r.offset == cuLayout.lsdaOffset)
+ lsda = r.getReferentInputSection();
+ }
+ return;
+}
using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
@@ -355,45 +394,7 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
if (!d->unwindEntry())
return;
- // If we have DWARF unwind info, create a slimmed-down CU entry that points
- // to it.
- if (d->unwindEntry()->getName() == section_names::ehFrame) {
- // The unwinder will look for the DWARF entry starting at the hint,
- // assuming the hint points to a valid CFI record start. If it
- // fails to find the record, it proceeds in a linear search through the
- // contiguous CFI records from the hint until the end of the section.
- // Ideally, in the case where the offset is too large to be encoded, we
- // would instead encode the largest possible offset to a valid CFI record,
- // but since we don't keep track of that, just encode zero -- the start of
- // the section is always the start of a CFI record.
- uint64_t dwarfOffsetHint =
- d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
- ? d->unwindEntry()->outSecOff
- : 0;
- cu.encoding = target->modeDwarfEncoding | dwarfOffsetHint;
- const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
- cu.functionLength = fde.funcLength;
- // Omit the DWARF personality from compact-unwind entry so that we
- // don't need to encode it.
- cu.personality = nullptr;
- cu.lsda = fde.lsda;
- return;
- }
-
- assert(d->unwindEntry()->getName() == section_names::compactUnwind);
-
- auto buf =
- reinterpret_cast<const uint8_t *>(d->unwindEntry()->data.data()) -
- target->wordSize;
- cu.functionLength =
- support::endian::read32le(buf + cuLayout.functionLengthOffset);
- cu.encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
- for (const Reloc &r : d->unwindEntry()->relocs) {
- if (r.offset == cuLayout.personalityOffset)
- cu.personality = r.referent.get<Symbol *>();
- else if (r.offset == cuLayout.lsdaOffset)
- cu.lsda = r.getReferentInputSection();
- }
+ cu.relocateOneCompactUnwindEntry(d);
});
}
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 826573b0c44a00..1cdfa3b3d02753 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -34,6 +34,17 @@ class UnwindInfoSection : public SyntheticSection {
UnwindInfoSection *makeUnwindInfoSection();
+// LLD's internal representation of a compact unwind entry.
+struct CompactUnwindEntry {
+ uint64_t functionAddress;
+ uint32_t functionLength;
+ compact_unwind_encoding_t encoding;
+ Symbol *personality;
+ InputSection *lsda;
+
+ void relocateOneCompactUnwindEntry(const Defined *d);
+};
+
} // namespace lld::macho
#endif
>From 67e7f61bcfd6a1a2677dfa1214963064d7ebe806 Mon Sep 17 00:00:00 2001
From: Peter Rong <PeterRong at meta.com>
Date: Wed, 21 Aug 2024 14:21:50 -0700
Subject: [PATCH 2/4] refactor
Signed-off-by: Peter Rong <PeterRong at meta.com>
---
lld/MachO/BPSectionOrderer.cpp | 22 +++++++++++-----------
lld/MachO/UnwindInfoSection.cpp | 23 +++++++++--------------
2 files changed, 20 insertions(+), 25 deletions(-)
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 350b7498a03ce1..ec51eaf1c28260 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -64,17 +64,17 @@ getRelocHash(const Reloc &reloc,
static uint64_t getUnwindInfoEncodingHash(const InputSection *isec) {
for (Symbol *sym : isec->symbols) {
if (auto *d = dyn_cast_or_null<Defined>(sym)) {
- if (auto *ue = d->unwindEntry()) {
- CompactUnwindEntry cu;
- cu.relocateOneCompactUnwindEntry(d);
- if (cu.lsda)
- return xxHash64("HAS LSDA");
- StringRef name = cu.personality ? cu.personality->getName().empty()
- ? "<unnamed>"
- : cu.personality->getName()
- : "<none>";
- return xxHash64((name + ";" + Twine::utohexstr(cu.encoding)).str());
- }
+ if (!d->unwindEntry())
+ continue;
+ CompactUnwindEntry cu;
+ cu.relocateOneCompactUnwindEntry(d);
+ if (cu.lsda)
+ return xxHash64("HAS LSDA");
+ StringRef name = (cu.personality == nullptr) ? "<none>"
+ : cu.personality->getName().empty()
+ ? "<unnamed>"
+ : cu.personality->getName();
+ return xxHash64((name + ";" + Twine::utohexstr(cu.encoding)).str());
}
}
return 0;
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 4f55af29b33140..22f0ad502f5760 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -111,10 +111,12 @@ CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD);
void lld::macho::CompactUnwindEntry::relocateOneCompactUnwindEntry(
const Defined *d) {
+ functionAddress = d->getVA();
+
ConcatInputSection *unwindEntry = d->unwindEntry();
- assert(unwindEntry);
+ if (!unwindEntry)
+ return;
- functionAddress = d->getVA();
// If we have DWARF unwind info, create a slimmed-down CU entry that points
// to it.
if (unwindEntry->getName() == section_names::ehFrame) {
@@ -126,12 +128,11 @@ void lld::macho::CompactUnwindEntry::relocateOneCompactUnwindEntry(
// would instead encode the largest possible offset to a valid CFI record,
// but since we don't keep track of that, just encode zero -- the start of
// the section is always the start of a CFI record.
- uint64_t dwarfOffsetHint =
- d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
- ? d->unwindEntry()->outSecOff
- : 0;
+ uint64_t dwarfOffsetHint = unwindEntry->outSecOff <= DWARF_SECTION_OFFSET
+ ? unwindEntry->outSecOff
+ : 0;
encoding = target->modeDwarfEncoding | dwarfOffsetHint;
- const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
+ const FDE &fde = cast<ObjFile>(d->getFile())->fdes[unwindEntry];
functionLength = fde.funcLength;
// Omit the DWARF personality from compact-unwind entry so that we
// don't need to encode it.
@@ -388,13 +389,7 @@ Symbol *UnwindInfoSectionImpl::canonicalizePersonality(Symbol *personality) {
void UnwindInfoSectionImpl::relocateCompactUnwind(
std::vector<CompactUnwindEntry> &cuEntries) {
parallelFor(0, symbolsVec.size(), [&](size_t i) {
- CompactUnwindEntry &cu = cuEntries[i];
- const Defined *d = symbolsVec[i].second;
- cu.functionAddress = d->getVA();
- if (!d->unwindEntry())
- return;
-
- cu.relocateOneCompactUnwindEntry(d);
+ cuEntries[i].relocateOneCompactUnwindEntry(symbolsVec[i].second);
});
}
>From 9e9d15aa9eebe704b8536137900cb13aaa4c9d5f Mon Sep 17 00:00:00 2001
From: Peter Rong <PeterRong at meta.com>
Date: Wed, 21 Aug 2024 16:05:05 -0700
Subject: [PATCH 3/4] update tests and comments
Signed-off-by: Peter Rong <PeterRong at meta.com>
---
lld/MachO/BPSectionOrderer.cpp | 8 ++++++++
lld/MachO/UnwindInfoSection.h | 1 +
lld/test/MachO/bp-section-orderer-stress.s | 24 ++++++++++++++++++++++
3 files changed, 33 insertions(+)
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index ec51eaf1c28260..cec4100f8aceb6 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -61,6 +61,14 @@ getRelocHash(const Reloc &reloc,
return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
}
+// Get a hash of the unwind info (after relocation).
+// This hash is not 100% accurate, but it's good enough for compression.
+//
+// Unwind info will be eliminated if it is the same with its neighboors.
+// We want to order functions such that the ones with similar unwind info
+// can stay together.
+// See more here:
+// https://faultlore.com/blah/compact-unwinding/#page-tables
static uint64_t getUnwindInfoEncodingHash(const InputSection *isec) {
for (Symbol *sym : isec->symbols) {
if (auto *d = dyn_cast_or_null<Defined>(sym)) {
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 1cdfa3b3d02753..8e68522ed54bb8 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -42,6 +42,7 @@ struct CompactUnwindEntry {
Symbol *personality;
InputSection *lsda;
+ // Relocate the entry to the given Symbol.
void relocateOneCompactUnwindEntry(const Defined *d);
};
diff --git a/lld/test/MachO/bp-section-orderer-stress.s b/lld/test/MachO/bp-section-orderer-stress.s
index fdc6a20e2655b9..cfb40c6bea34f0 100644
--- a/lld/test/MachO/bp-section-orderer-stress.s
+++ b/lld/test/MachO/bp-section-orderer-stress.s
@@ -29,11 +29,15 @@ profiled_functions = function_names[: int(num_functions / 2)]
function_contents = [
f"""
{name}:
+ .cfi_startproc
+ .cfi_personality 155, _personality_{i % 5}
+ .cfi_lsda 16, _exception{i % 3}
add w0, w0, #{i % 4096}
add w1, w1, #{i % 10}
add w2, w0, #{i % 20}
adrp x3, {name}@PAGE
ret
+ .cfi_endproc
"""
for i, name in enumerate(function_names)
]
@@ -78,6 +82,26 @@ with open(assembly_filepath, "w") as f:
_main:
ret
+_personality_0:
+ ret
+_personality_1:
+ ret
+_personality_2:
+ ret
+_personality_3:
+ ret
+_personality_4:
+ ret
+
+_exception0:
+ .quad 0x4200
+
+_exception1:
+ .quad 0x4210
+
+_exception2:
+ .quad 0x4220
+
{"".join(function_contents)}
.data
>From b4f7ddfad7ada893ebc05145a1954a7e484608c8 Mon Sep 17 00:00:00 2001
From: Peter Rong <PeterRong at meta.com>
Date: Thu, 22 Aug 2024 10:44:57 -0700
Subject: [PATCH 4/4] personality can't be empty
Signed-off-by: Peter Rong <PeterRong at meta.com>
---
lld/MachO/BPSectionOrderer.cpp | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index cec4100f8aceb6..3ec8a585233088 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -78,10 +78,7 @@ static uint64_t getUnwindInfoEncodingHash(const InputSection *isec) {
cu.relocateOneCompactUnwindEntry(d);
if (cu.lsda)
return xxHash64("HAS LSDA");
- StringRef name = (cu.personality == nullptr) ? "<none>"
- : cu.personality->getName().empty()
- ? "<unnamed>"
- : cu.personality->getName();
+ StringRef name = (cu.personality) ? cu.personality->getName() : "<null>";
return xxHash64((name + ";" + Twine::utohexstr(cu.encoding)).str());
}
}
More information about the llvm-commits
mailing list