[lld] 82dcf30 - [lld-macho] Use fewer indirections in UnwindInfo implementation

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 8 20:49:17 PDT 2022


Author: Jez Ng
Date: 2022-04-08T23:49:07-04:00
New Revision: 82dcf3063697eba96b825e91f11a8e74c4c95cb1

URL: https://github.com/llvm/llvm-project/commit/82dcf3063697eba96b825e91f11a8e74c4c95cb1
DIFF: https://github.com/llvm/llvm-project/commit/82dcf3063697eba96b825e91f11a8e74c4c95cb1.diff

LOG: [lld-macho] Use fewer indirections in UnwindInfo implementation

The previous implementation of UnwindInfoSection materialized
all the compact unwind entries & applied their relocations, then parsed
the resulting data to generate the final unwind info. This design had
some unfortunate conseqeuences: since relocations can only be applied
after their referents have had addresses assigned, operations that need
to happen before address assignment must contort themselves. (See
{D113582} and observe how this diff greatly simplifies it.)

Moreover, it made synthesizing new compact unwind entries awkward.
Handling PR50956 will require us to do this synthesis, and is the main
motivation behind this diff.

Previously, instead of generating a new CompactUnwindEntry directly, we
would have had to generate a ConcatInputSection with a number of
`Reloc`s that would then get "flattened" into a CompactUnwindEntry.

This diff introduces an internal representation of `CompactUnwindEntry`
(the former `CompactUnwindEntry` has been renamed to
`CompactUnwindLayout`). The new CompactUnwindEntry stores references to
its personality symbol and LSDA section directly, without the use of
`Reloc` structs.

In addition to being easier to work with, this diff also allows us to
handle unwind info whose personality symbols are located in sections
placed after the `__unwind_info`.

Reviewed By: #lld-macho, oontvoo

Differential Revision: https://reviews.llvm.org/D123276

Added: 
    

Modified: 
    lld/MachO/InputFiles.cpp
    lld/MachO/UnwindInfoSection.cpp
    lld/test/MachO/compact-unwind.s
    lld/test/MachO/invalid/compact-unwind-bad-reloc.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 26fa566b63c37..8423fb8bb9beb 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -1013,6 +1013,11 @@ void ObjFile::registerCompactUnwind() {
         referentIsec =
             cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
       }
+      // Unwind info lives in __DATA, and finalization of __TEXT will occur
+      // before finalization of __DATA. Moreover, the finalization of unwind
+      // info depends on the exact addresses that it references. So it is safe
+      // for compact unwind to reference addresses in __TEXT, but not addresses
+      // in any other segment.
       if (referentIsec->getSegName() != segment_names::text)
         error(isec->getLocation(r.offset) + " references section " +
               referentIsec->getName() + " which is not in segment __TEXT");

diff  --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index b9dc7c13a0288..ace4bfe2ee09d 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -95,7 +95,7 @@ using namespace lld::macho;
 // TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410
 // TODO(gkm): how do we align the 2nd-level pages?
 
-template <class Ptr> struct CompactUnwindEntry {
+template <class Ptr> struct CompactUnwindLayout {
   Ptr functionAddress;
   uint32_t functionLength;
   compact_unwind_encoding_t encoding;
@@ -103,6 +103,14 @@ template <class Ptr> struct CompactUnwindEntry {
   Ptr lsda;
 };
 
+struct CompactUnwindEntry {
+  uint64_t functionAddress;
+  uint32_t functionLength;
+  compact_unwind_encoding_t encoding;
+  Symbol *personality;
+  InputSection *lsda;
+};
+
 using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
 
 struct SecondLevelPage {
@@ -118,8 +126,7 @@ template <class Ptr>
 class UnwindInfoSectionImpl final : public UnwindInfoSection {
 public:
   void prepareRelocations(ConcatInputSection *) override;
-  void relocateCompactUnwind(std::vector<CompactUnwindEntry<Ptr>> &);
-  Reloc *findLsdaReloc(ConcatInputSection *) const;
+  void relocateCompactUnwind(std::vector<CompactUnwindEntry> &);
   void encodePersonalities();
   void finalize() override;
   void writeTo(uint8_t *buf) const override;
@@ -129,11 +136,10 @@ class UnwindInfoSectionImpl final : public UnwindInfoSection {
   EncodingMap commonEncodingIndexes;
   // The entries here will be in the same order as their originating symbols
   // in symbolsVec.
-  std::vector<CompactUnwindEntry<Ptr>> cuEntries;
+  std::vector<CompactUnwindEntry> cuEntries;
   // Indices into the cuEntries vector.
   std::vector<size_t> cuIndices;
-  // Indices of personality functions within the GOT.
-  std::vector<Ptr> personalities;
+  std::vector<Symbol *> personalities;
   SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *>
       personalityTable;
   // Indices into cuEntries for CUEs with a non-null LSDA.
@@ -206,8 +212,7 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
     // compact unwind entries that references them, and thus appear as section
     // relocs. There is no need to prepare them. We only prepare relocs for
     // personality functions.
-    if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
-        offsetof(CompactUnwindEntry<Ptr>, personality))
+    if (r.offset != offsetof(CompactUnwindLayout<Ptr>, personality))
       continue;
 
     if (auto *s = r.referent.dyn_cast<Symbol *>()) {
@@ -275,55 +280,35 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
   }
 }
 
-// Unwind info lives in __DATA, and finalization of __TEXT will occur before
-// finalization of __DATA. Moreover, the finalization of unwind info depends on
-// the exact addresses that it references. So it is safe for compact unwind to
-// reference addresses in __TEXT, but not addresses in any other segment.
-static ConcatInputSection *
-checkTextSegment(InputSection *isec, InputSection *cuIsec, uint64_t off) {
-  if (isec->getSegName() != segment_names::text)
-    error(cuIsec->getLocation(off) + " references section " + isec->getName() +
-          " which is not in segment __TEXT");
-  // __TEXT should always contain ConcatInputSections.
-  return cast<ConcatInputSection>(isec);
-}
-
 // We need to apply the relocations to the pre-link compact unwind section
 // before converting it to post-link form. There should only be absolute
 // relocations here: since we are not emitting the pre-link CU section, there
 // is no source address to make a relative location meaningful.
 template <class Ptr>
 void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
-    std::vector<CompactUnwindEntry<Ptr>> &cuEntries) {
+    std::vector<CompactUnwindEntry> &cuEntries) {
   parallelForEachN(0, symbolsVec.size(), [&](size_t i) {
-    uint8_t *buf = reinterpret_cast<uint8_t *>(cuEntries.data()) +
-                   i * sizeof(CompactUnwindEntry<Ptr>);
+    CompactUnwindEntry &cu = cuEntries[i];
     const Defined *d = symbolsVec[i].second;
-    // Write the functionAddress.
-    writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2);
+    cu.functionAddress = d->getVA();
     if (!d->unwindEntry)
       return;
 
-    // Write the rest of the CUE.
-    memcpy(buf + sizeof(Ptr), d->unwindEntry->data.data(),
-           d->unwindEntry->data.size());
+    auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) -
+               sizeof(Ptr);
+    cu.functionLength = support::endian::read32le(
+        buf + offsetof(CompactUnwindLayout<Ptr>, functionLength));
+    cu.encoding = support::endian::read32le(
+        buf + offsetof(CompactUnwindLayout<Ptr>, encoding));
     for (const Reloc &r : d->unwindEntry->relocs) {
-      uint64_t referentVA = 0;
-      if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
-        if (!isa<Undefined>(referentSym)) {
-          if (auto *defined = dyn_cast<Defined>(referentSym))
-            checkTextSegment(defined->isec, d->unwindEntry, r.offset);
-          // At this point in the link, we may not yet know the final address of
-          // the GOT, so we just encode the index. We make it a 1-based index so
-          // that we can distinguish the null pointer case.
-          referentVA = referentSym->gotIndex + 1;
-        }
-      } else {
-        auto *referentIsec = r.referent.get<InputSection *>();
-        checkTextSegment(referentIsec, d->unwindEntry, r.offset);
-        referentVA = referentIsec->getVA(r.addend);
+      if (r.offset == offsetof(CompactUnwindLayout<Ptr>, personality)) {
+        cu.personality = r.referent.get<Symbol *>();
+      } else if (r.offset == offsetof(CompactUnwindLayout<Ptr>, lsda)) {
+        if (auto *referentSym = r.referent.dyn_cast<Symbol *>())
+          cu.lsda = cast<Defined>(referentSym)->isec;
+        else
+          cu.lsda = r.referent.get<InputSection *>();
       }
-      writeAddress(buf + r.offset, referentVA, r.length);
     }
   });
 }
@@ -332,8 +317,8 @@ void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
 // encode them as 2-bit indices into a small array.
 template <class Ptr> void UnwindInfoSectionImpl<Ptr>::encodePersonalities() {
   for (size_t idx : cuIndices) {
-    CompactUnwindEntry<Ptr> &cu = cuEntries[idx];
-    if (cu.personality == 0)
+    CompactUnwindEntry &cu = cuEntries[idx];
+    if (cu.personality == nullptr)
       continue;
     // Linear search is fast enough for a small array.
     auto it = find(personalities, cu.personality);
@@ -349,7 +334,7 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::encodePersonalities() {
             static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK));
   }
   if (personalities.size() > 3)
-    error("too many personalities (" + std::to_string(personalities.size()) +
+    error("too many personalities (" + Twine(personalities.size()) +
           ") for compact unwind to encode");
 }
 
@@ -378,20 +363,6 @@ static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
   return true;
 }
 
-template <class Ptr>
-Reloc *
-UnwindInfoSectionImpl<Ptr>::findLsdaReloc(ConcatInputSection *isec) const {
-  if (isec == nullptr)
-    return nullptr;
-  auto it = llvm::find_if(isec->relocs, [](const Reloc &r) {
-    return r.offset % sizeof(CompactUnwindEntry<Ptr>) ==
-           offsetof(CompactUnwindEntry<Ptr>, lsda);
-  });
-  if (it == isec->relocs.end())
-    return nullptr;
-  return &*it;
-}
-
 // Scan the __LD,__compact_unwind entries and compute the space needs of
 // __TEXT,__unwind_info and __TEXT,__eh_frame.
 template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
@@ -434,25 +405,9 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
            cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding &&
            cuEntries[*foldBegin].personality ==
                cuEntries[*foldEnd].personality &&
-           canFoldEncoding(cuEntries[*foldEnd].encoding)) {
-      // In most cases, we can just compare the values of cuEntries[*].lsda.
-      // However, it is possible for -rename_section to cause the LSDA section
-      // (__gcc_except_tab) to be finalized after the unwind info section. In
-      // that case, we don't yet have unique addresses for the LSDA entries.
-      // So we check their relocations instead.
-      // FIXME: should we account for an LSDA at an absolute address? ld64 seems
-      // to support it, but it seems unlikely to be used in practice.
-      Reloc *lsda1 = findLsdaReloc(symbolsVec[*foldBegin].second->unwindEntry);
-      Reloc *lsda2 = findLsdaReloc(symbolsVec[*foldEnd].second->unwindEntry);
-      if (lsda1 == nullptr && lsda2 == nullptr)
-        continue;
-      if (lsda1 == nullptr || lsda2 == nullptr)
-        break;
-      if (lsda1->referent != lsda2->referent)
-        break;
-      if (lsda1->addend != lsda2->addend)
-        break;
-    }
+           cuEntries[*foldBegin].lsda == cuEntries[*foldEnd].lsda &&
+           canFoldEncoding(cuEntries[*foldEnd].encoding))
+      ;
     *foldWrite++ = *foldBegin;
     foldBegin = foldEnd;
   }
@@ -510,7 +465,7 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
             sizeof(uint32_t);
     while (wordsRemaining >= 1 && i < cuIndices.size()) {
       idx = cuIndices[i];
-      const CompactUnwindEntry<Ptr> *cuPtr = &cuEntries[idx];
+      const CompactUnwindEntry *cuPtr = &cuEntries[idx];
       if (cuPtr->functionAddress >= functionAddressMax) {
         break;
       } else if (commonEncodingIndexes.count(cuPtr->encoding) ||
@@ -545,8 +500,7 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
 
   for (size_t idx : cuIndices) {
     lsdaIndex[idx] = entriesWithLsda.size();
-    const Defined *d = symbolsVec[idx].second;
-    if (findLsdaReloc(d->unwindEntry))
+    if (cuEntries[idx].lsda)
       entriesWithLsda.push_back(idx);
   }
 
@@ -588,9 +542,8 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
     *i32p++ = encoding.first;
 
   // Personalities
-  for (Ptr personality : personalities)
-    *i32p++ =
-        in.got->addr + (personality - 1) * target->wordSize - in.header->addr;
+  for (const Symbol *personality : personalities)
+    *i32p++ = personality->getGotVA() - in.header->addr;
 
   // Level-1 index
   uint32_t lsdaOffset =
@@ -609,7 +562,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
     l2PagesOffset += SECOND_LEVEL_PAGE_BYTES;
   }
   // Level-1 sentinel
-  const CompactUnwindEntry<Ptr> &cuEnd = cuEntries[cuIndices.back()];
+  const CompactUnwindEntry &cuEnd = cuEntries[cuIndices.back()];
   iep->functionOffset =
       cuEnd.functionAddress - in.header->addr + cuEnd.functionLength;
   iep->secondLevelPagesSectionOffset = 0;
@@ -622,18 +575,8 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
   auto *lep =
       reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep);
   for (size_t idx : entriesWithLsda) {
-    const CompactUnwindEntry<Ptr> &cu = cuEntries[idx];
-    const Defined *d = symbolsVec[idx].second;
-    if (Reloc *r = findLsdaReloc(d->unwindEntry)) {
-      uint64_t va;
-      if (auto *isec = r->referent.dyn_cast<InputSection *>()) {
-        va = isec->getVA(r->addend);
-      } else {
-        auto *sym = r->referent.get<Symbol *>();
-        va = sym->getVA() + r->addend;
-      }
-      lep->lsdaOffset = va - in.header->addr;
-    }
+    const CompactUnwindEntry &cu = cuEntries[idx];
+    lep->lsdaOffset = cu.lsda->getVA(/*off=*/0) - in.header->addr;
     lep->functionOffset = cu.functionAddress - in.header->addr;
     lep++;
   }
@@ -656,7 +599,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
       p2p->encodingsCount = page.localEncodings.size();
       auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
       for (size_t i = 0; i < page.entryCount; i++) {
-        const CompactUnwindEntry<Ptr> &cue =
+        const CompactUnwindEntry &cue =
             cuEntries[cuIndices[page.entryIndex + i]];
         auto it = commonEncodingIndexes.find(cue.encoding);
         if (it == commonEncodingIndexes.end())
@@ -676,7 +619,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
       p2p->entryCount = page.entryCount;
       auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
       for (size_t i = 0; i < page.entryCount; i++) {
-        const CompactUnwindEntry<Ptr> &cue =
+        const CompactUnwindEntry &cue =
             cuEntries[cuIndices[page.entryIndex + i]];
         *ep++ = cue.functionAddress;
         *ep++ = cue.encoding;

diff  --git a/lld/test/MachO/compact-unwind.s b/lld/test/MachO/compact-unwind.s
index 289bba2a2294f..25af4dd7eae1d 100644
--- a/lld/test/MachO/compact-unwind.s
+++ b/lld/test/MachO/compact-unwind.s
@@ -81,6 +81,7 @@
 .globl _my_personality, _exception0
 .text
 .p2align 2
+.no_dead_strip _foo
 _foo:
   .cfi_startproc
 ## This will generate a section relocation.
@@ -91,6 +92,7 @@ _foo:
   .cfi_endproc
 
 .p2align 2
+.no_dead_strip _bar
 _bar:
   .cfi_startproc
 ## Check that we dedup references to the same statically-linked personality.
@@ -100,7 +102,11 @@ _bar:
   ret
   .cfi_endproc
 
+.data
 .p2align 2
+## We put this personality in `__data` to test if we correctly handle
+## personality symbols whose output addresses occur after that of the
+## `__unwind_info` section.
 _my_personality:
   ret
 
@@ -108,6 +114,8 @@ _my_personality:
 _exception0:
   .space 1
 
+.subsections_via_symbols
+
 #--- main.s
 .globl _main, _quux, _my_personality, _exception1
 

diff  --git a/lld/test/MachO/invalid/compact-unwind-bad-reloc.s b/lld/test/MachO/invalid/compact-unwind-bad-reloc.s
index dca668c9487e6..61c7a42968375 100644
--- a/lld/test/MachO/invalid/compact-unwind-bad-reloc.s
+++ b/lld/test/MachO/invalid/compact-unwind-bad-reloc.s
@@ -1,10 +1,9 @@
 # REQUIRES: x86
 # RUN: rm -rf %t; split-file %s %t
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/bad-function.s -o %t/bad-function.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/bad-personality.s -o %t/bad-personality.o
-# RUN: not %lld -lSystem -lc++ %t/bad-function.o -o %t 2>&1 | FileCheck %s -DFILE=%t/bad-function.o -D#OFF=0x0
-# RUN: not %lld -lSystem -lc++ %t/bad-personality.o -o %t 2>&1 | FileCheck %s -DFILE=%t/bad-personality.o -D#OFF=0x10
-# CHECK: error: [[FILE]]:(__compact_unwind+0x[[#%x,OFF]]) references section __data which is not in segment __TEXT
+# RUN: not %lld -lSystem -dylib -lc++ %t/bad-function.o -o /dev/null 2>&1 | FileCheck %s
+# CHECK: error: {{.*}}bad-function.o:(__compact_unwind+0x0) references section __data which is not in segment __TEXT
+# CHECK: error: {{.*}}bad-function.o:(__compact_unwind+0x20) references section __data which is not in segment __TEXT
 
 #--- bad-function.s
 .data
@@ -15,16 +14,11 @@ _not_a_function:
   retq
   .cfi_endproc
 
-#--- bad-personality.s
-.globl _main, _not_a_function
-.text
-_main:
+_not_a_function_2:
   .cfi_startproc
-  .cfi_personality 155, _my_personality
+  .cfi_personality 155, ___gxx_personality_v0
   .cfi_def_cfa_offset 16
   retq
   .cfi_endproc
 
-.data
-.globl _my_personality
-_my_personality:
+.subsections_via_symbols


        


More information about the llvm-commits mailing list