[lld] [LLD, MachO] Add `getUnwindInfoEncodingHash` to `BPSectionOrderer.cpp`. (PR #105587)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 21 14:14:05 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld
@llvm/pr-subscribers-lld-macho
Author: Peter Rong (DataCorrupted)
<details>
<summary>Changes</summary>
We have seen that the order of UnwindInfo can change the size of the final binary [[1](https://discourse.llvm.org/t/some-questions-about-profile-guided-function-order-via-temporal-profiling-such-as-binary-size-regression/80513/6?u=ellishg)]. To harness that, we add the hash of unwind info (after relocation) to the `BPSectionOrderer`
@<!-- -->ellishg Note: `lsda` and `personality` could be set already during actual relocation, we can't capture that during hashing.
---
Full diff: https://github.com/llvm/llvm-project/pull/105587.diff
3 Files Affected:
- (modified) lld/MachO/BPSectionOrderer.cpp (+22)
- (modified) lld/MachO/UnwindInfoSection.cpp (+48-47)
- (modified) lld/MachO/UnwindInfoSection.h (+11)
``````````diff
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 568843d72bbb50..350b7498a03ce1 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -8,6 +8,7 @@
#include "BPSectionOrderer.h"
#include "InputSection.h"
+#include "UnwindInfoSection.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
@@ -60,6 +61,25 @@ getRelocHash(const Reloc &reloc,
return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
}
+static uint64_t getUnwindInfoEncodingHash(const InputSection *isec) {
+ for (Symbol *sym : isec->symbols) {
+ if (auto *d = dyn_cast_or_null<Defined>(sym)) {
+ if (auto *ue = d->unwindEntry()) {
+ CompactUnwindEntry cu;
+ cu.relocateOneCompactUnwindEntry(d);
+ if (cu.lsda)
+ return xxHash64("HAS LSDA");
+ StringRef name = cu.personality ? cu.personality->getName().empty()
+ ? "<unnamed>"
+ : cu.personality->getName()
+ : "<none>";
+ return xxHash64((name + ";" + Twine::utohexstr(cu.encoding)).str());
+ }
+ }
+ }
+ return 0;
+}
+
static void constructNodesForCompression(
const SmallVector<const InputSection *> §ions,
const DenseMap<const InputSection *, uint64_t> §ionToIdx,
@@ -76,6 +96,8 @@ static void constructNodesForCompression(
const auto *isec = sections[sectionIdx];
constexpr unsigned windowSize = 4;
+ hashes.push_back(getUnwindInfoEncodingHash(isec));
+
for (size_t i = 0; i < isec->data.size(); i++) {
auto window = isec->data.drop_front(i).take_front(windowSize);
hashes.push_back(xxHash64(window));
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 7033481d6014b5..4f55af29b33140 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -109,14 +109,53 @@ CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD);
#undef FOR_EACH_CU_FIELD
-// LLD's internal representation of a compact unwind entry.
-struct CompactUnwindEntry {
- uint64_t functionAddress;
- uint32_t functionLength;
- compact_unwind_encoding_t encoding;
- Symbol *personality;
- InputSection *lsda;
-};
+void lld::macho::CompactUnwindEntry::relocateOneCompactUnwindEntry(
+ const Defined *d) {
+ ConcatInputSection *unwindEntry = d->unwindEntry();
+ assert(unwindEntry);
+
+ functionAddress = d->getVA();
+ // If we have DWARF unwind info, create a slimmed-down CU entry that points
+ // to it.
+ if (unwindEntry->getName() == section_names::ehFrame) {
+ // The unwinder will look for the DWARF entry starting at the hint,
+ // assuming the hint points to a valid CFI record start. If it
+ // fails to find the record, it proceeds in a linear search through the
+ // contiguous CFI records from the hint until the end of the section.
+ // Ideally, in the case where the offset is too large to be encoded, we
+ // would instead encode the largest possible offset to a valid CFI record,
+ // but since we don't keep track of that, just encode zero -- the start of
+ // the section is always the start of a CFI record.
+ uint64_t dwarfOffsetHint =
+ d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
+ ? d->unwindEntry()->outSecOff
+ : 0;
+ encoding = target->modeDwarfEncoding | dwarfOffsetHint;
+ const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
+ functionLength = fde.funcLength;
+ // Omit the DWARF personality from compact-unwind entry so that we
+ // don't need to encode it.
+ personality = nullptr;
+ lsda = fde.lsda;
+ return;
+ }
+
+ assert(unwindEntry->getName() == section_names::compactUnwind);
+
+ CompactUnwindLayout cuLayout(target->wordSize);
+ auto buf = reinterpret_cast<const uint8_t *>(unwindEntry->data.data()) -
+ target->wordSize;
+ functionLength =
+ support::endian::read32le(buf + cuLayout.functionLengthOffset);
+ encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
+ for (const Reloc &r : unwindEntry->relocs) {
+ if (r.offset == cuLayout.personalityOffset)
+ personality = r.referent.get<Symbol *>();
+ else if (r.offset == cuLayout.lsdaOffset)
+ lsda = r.getReferentInputSection();
+ }
+ return;
+}
using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
@@ -355,45 +394,7 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
if (!d->unwindEntry())
return;
- // If we have DWARF unwind info, create a slimmed-down CU entry that points
- // to it.
- if (d->unwindEntry()->getName() == section_names::ehFrame) {
- // The unwinder will look for the DWARF entry starting at the hint,
- // assuming the hint points to a valid CFI record start. If it
- // fails to find the record, it proceeds in a linear search through the
- // contiguous CFI records from the hint until the end of the section.
- // Ideally, in the case where the offset is too large to be encoded, we
- // would instead encode the largest possible offset to a valid CFI record,
- // but since we don't keep track of that, just encode zero -- the start of
- // the section is always the start of a CFI record.
- uint64_t dwarfOffsetHint =
- d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
- ? d->unwindEntry()->outSecOff
- : 0;
- cu.encoding = target->modeDwarfEncoding | dwarfOffsetHint;
- const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
- cu.functionLength = fde.funcLength;
- // Omit the DWARF personality from compact-unwind entry so that we
- // don't need to encode it.
- cu.personality = nullptr;
- cu.lsda = fde.lsda;
- return;
- }
-
- assert(d->unwindEntry()->getName() == section_names::compactUnwind);
-
- auto buf =
- reinterpret_cast<const uint8_t *>(d->unwindEntry()->data.data()) -
- target->wordSize;
- cu.functionLength =
- support::endian::read32le(buf + cuLayout.functionLengthOffset);
- cu.encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
- for (const Reloc &r : d->unwindEntry()->relocs) {
- if (r.offset == cuLayout.personalityOffset)
- cu.personality = r.referent.get<Symbol *>();
- else if (r.offset == cuLayout.lsdaOffset)
- cu.lsda = r.getReferentInputSection();
- }
+ cu.relocateOneCompactUnwindEntry(d);
});
}
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 826573b0c44a00..1cdfa3b3d02753 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -34,6 +34,17 @@ class UnwindInfoSection : public SyntheticSection {
UnwindInfoSection *makeUnwindInfoSection();
+// LLD's internal representation of a compact unwind entry.
+struct CompactUnwindEntry {
+ uint64_t functionAddress;
+ uint32_t functionLength;
+ compact_unwind_encoding_t encoding;
+ Symbol *personality;
+ InputSection *lsda;
+
+ void relocateOneCompactUnwindEntry(const Defined *d);
+};
+
} // namespace lld::macho
#endif
``````````
</details>
https://github.com/llvm/llvm-project/pull/105587
More information about the llvm-commits
mailing list