[lld] [LLD, MachO] Add `getUnwindInfoEncodingHash` to `BPSectionOrderer.cpp`. (PR #105587)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 21 14:14:05 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld

@llvm/pr-subscribers-lld-macho

Author: Peter Rong (DataCorrupted)

<details>
<summary>Changes</summary>

We have seen that the order of UnwindInfo can change the size of the final binary [[1](https://discourse.llvm.org/t/some-questions-about-profile-guided-function-order-via-temporal-profiling-such-as-binary-size-regression/80513/6?u=ellishg)]. To harness that, we add the hash of unwind info (after relocation) to the `BPSectionOrderer`

@<!-- -->ellishg Note: `lsda` and `personality` could be set already during actual relocation, we can't capture that during hashing.

---
Full diff: https://github.com/llvm/llvm-project/pull/105587.diff


3 Files Affected:

- (modified) lld/MachO/BPSectionOrderer.cpp (+22) 
- (modified) lld/MachO/UnwindInfoSection.cpp (+48-47) 
- (modified) lld/MachO/UnwindInfoSection.h (+11) 


``````````diff
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 568843d72bbb50..350b7498a03ce1 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -8,6 +8,7 @@
 
 #include "BPSectionOrderer.h"
 #include "InputSection.h"
+#include "UnwindInfoSection.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
@@ -60,6 +61,25 @@ getRelocHash(const Reloc &reloc,
   return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
 }
 
+static uint64_t getUnwindInfoEncodingHash(const InputSection *isec) {
+  for (Symbol *sym : isec->symbols) {
+    if (auto *d = dyn_cast_or_null<Defined>(sym)) {
+      if (auto *ue = d->unwindEntry()) {
+        CompactUnwindEntry cu;
+        cu.relocateOneCompactUnwindEntry(d);
+        if (cu.lsda)
+          return xxHash64("HAS LSDA");
+        StringRef name = cu.personality ? cu.personality->getName().empty()
+                                              ? "<unnamed>"
+                                              : cu.personality->getName()
+                                        : "<none>";
+        return xxHash64((name + ";" + Twine::utohexstr(cu.encoding)).str());
+      }
+    }
+  }
+  return 0;
+}
+
 static void constructNodesForCompression(
     const SmallVector<const InputSection *> &sections,
     const DenseMap<const InputSection *, uint64_t> &sectionToIdx,
@@ -76,6 +96,8 @@ static void constructNodesForCompression(
     const auto *isec = sections[sectionIdx];
     constexpr unsigned windowSize = 4;
 
+    hashes.push_back(getUnwindInfoEncodingHash(isec));
+
     for (size_t i = 0; i < isec->data.size(); i++) {
       auto window = isec->data.drop_front(i).take_front(windowSize);
       hashes.push_back(xxHash64(window));
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 7033481d6014b5..4f55af29b33140 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -109,14 +109,53 @@ CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD);
 
 #undef FOR_EACH_CU_FIELD
 
-// LLD's internal representation of a compact unwind entry.
-struct CompactUnwindEntry {
-  uint64_t functionAddress;
-  uint32_t functionLength;
-  compact_unwind_encoding_t encoding;
-  Symbol *personality;
-  InputSection *lsda;
-};
+void lld::macho::CompactUnwindEntry::relocateOneCompactUnwindEntry(
+    const Defined *d) {
+  ConcatInputSection *unwindEntry = d->unwindEntry();
+  assert(unwindEntry);
+
+  functionAddress = d->getVA();
+  // If we have DWARF unwind info, create a slimmed-down CU entry that points
+  // to it.
+  if (unwindEntry->getName() == section_names::ehFrame) {
+    // The unwinder will look for the DWARF entry starting at the hint,
+    // assuming the hint points to a valid CFI record start. If it
+    // fails to find the record, it proceeds in a linear search through the
+    // contiguous CFI records from the hint until the end of the section.
+    // Ideally, in the case where the offset is too large to be encoded, we
+    // would instead encode the largest possible offset to a valid CFI record,
+    // but since we don't keep track of that, just encode zero -- the start of
+    // the section is always the start of a CFI record.
+    uint64_t dwarfOffsetHint =
+        d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
+            ? d->unwindEntry()->outSecOff
+            : 0;
+    encoding = target->modeDwarfEncoding | dwarfOffsetHint;
+    const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
+    functionLength = fde.funcLength;
+    // Omit the DWARF personality from compact-unwind entry so that we
+    // don't need to encode it.
+    personality = nullptr;
+    lsda = fde.lsda;
+    return;
+  }
+
+  assert(unwindEntry->getName() == section_names::compactUnwind);
+
+  CompactUnwindLayout cuLayout(target->wordSize);
+  auto buf = reinterpret_cast<const uint8_t *>(unwindEntry->data.data()) -
+             target->wordSize;
+  functionLength =
+      support::endian::read32le(buf + cuLayout.functionLengthOffset);
+  encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
+  for (const Reloc &r : unwindEntry->relocs) {
+    if (r.offset == cuLayout.personalityOffset)
+      personality = r.referent.get<Symbol *>();
+    else if (r.offset == cuLayout.lsdaOffset)
+      lsda = r.getReferentInputSection();
+  }
+  return;
+}
 
 using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
 
@@ -355,45 +394,7 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
     if (!d->unwindEntry())
       return;
 
-    // If we have DWARF unwind info, create a slimmed-down CU entry that points
-    // to it.
-    if (d->unwindEntry()->getName() == section_names::ehFrame) {
-      // The unwinder will look for the DWARF entry starting at the hint,
-      // assuming the hint points to a valid CFI record start. If it
-      // fails to find the record, it proceeds in a linear search through the
-      // contiguous CFI records from the hint until the end of the section.
-      // Ideally, in the case where the offset is too large to be encoded, we
-      // would instead encode the largest possible offset to a valid CFI record,
-      // but since we don't keep track of that, just encode zero -- the start of
-      // the section is always the start of a CFI record.
-      uint64_t dwarfOffsetHint =
-          d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
-              ? d->unwindEntry()->outSecOff
-              : 0;
-      cu.encoding = target->modeDwarfEncoding | dwarfOffsetHint;
-      const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
-      cu.functionLength = fde.funcLength;
-      // Omit the DWARF personality from compact-unwind entry so that we
-      // don't need to encode it.
-      cu.personality = nullptr;
-      cu.lsda = fde.lsda;
-      return;
-    }
-
-    assert(d->unwindEntry()->getName() == section_names::compactUnwind);
-
-    auto buf =
-        reinterpret_cast<const uint8_t *>(d->unwindEntry()->data.data()) -
-        target->wordSize;
-    cu.functionLength =
-        support::endian::read32le(buf + cuLayout.functionLengthOffset);
-    cu.encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
-    for (const Reloc &r : d->unwindEntry()->relocs) {
-      if (r.offset == cuLayout.personalityOffset)
-        cu.personality = r.referent.get<Symbol *>();
-      else if (r.offset == cuLayout.lsdaOffset)
-        cu.lsda = r.getReferentInputSection();
-    }
+    cu.relocateOneCompactUnwindEntry(d);
   });
 }
 
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 826573b0c44a00..1cdfa3b3d02753 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -34,6 +34,17 @@ class UnwindInfoSection : public SyntheticSection {
 
 UnwindInfoSection *makeUnwindInfoSection();
 
+// LLD's internal representation of a compact unwind entry.
+struct CompactUnwindEntry {
+  uint64_t functionAddress;
+  uint32_t functionLength;
+  compact_unwind_encoding_t encoding;
+  Symbol *personality;
+  InputSection *lsda;
+
+  void relocateOneCompactUnwindEntry(const Defined *d);
+};
+
 } // namespace lld::macho
 
 #endif

``````````

</details>


https://github.com/llvm/llvm-project/pull/105587


More information about the llvm-commits mailing list