[lld] 525bfa1 - [lld-macho] Emit personalities in compact unwind

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 8 10:48:14 PST 2021


Author: Jez Ng
Date: 2021-02-08T13:47:59-05:00
New Revision: 525bfa10ec1d4e3dfa3932a299cd67ffe59a5827

URL: https://github.com/llvm/llvm-project/commit/525bfa10ec1d4e3dfa3932a299cd67ffe59a5827
DIFF: https://github.com/llvm/llvm-project/commit/525bfa10ec1d4e3dfa3932a299cd67ffe59a5827.diff

LOG: [lld-macho] Emit personalities in compact unwind

Note that there is a triple indirection involved with
personalities and compact unwind:

1. Two bits of each CU encoding are used as an offset into the
   personality array.
2. Each entry of the personality array is an offset from the image base.
   The resulting address (after adding the image base) should point within the
   GOT.
3. The corresponding GOT entry contains the actual pointer to the
   personality function.

To further complicate things, when the personality function is in the
object file (as opposed to a dylib), its references in
`__compact_unwind` may refer to it via a section + offset relocation
instead of a symbol relocation. Since our GOT implementation can only
create entries for symbols, we have to create a synthetic symbol at the
given section offset.

Reviewed By: clayborg

Differential Revision: https://reviews.llvm.org/D95809

Added: 
    lld/test/MachO/compact-unwind-generated.test
    lld/test/MachO/compact-unwind.s
    lld/test/MachO/invalid/compact-unwind-bad-reloc.s
    lld/test/MachO/invalid/compact-unwind-personalities.s

Modified: 
    lld/MachO/SyntheticSections.cpp
    lld/MachO/UnwindInfoSection.cpp
    lld/MachO/UnwindInfoSection.h
    lld/MachO/Writer.cpp
    lld/test/MachO/tools/validate-unwind-info.py

Removed: 
    lld/test/MachO/compact-unwind-pie.s
    lld/test/MachO/compact-unwind.test


################################################################################
diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 561c845adcea..78e4e2785886 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -825,18 +825,23 @@ void IndirectSymtabSection::finalizeContents() {
   in.stubs->reserved1 = in.lazyPointers->reserved1 = off;
 }
 
+static uint32_t indirectValue(const Symbol *sym) {
+  return sym->symtabIndex != UINT32_MAX ? sym->symtabIndex
+                                        : MachO::INDIRECT_SYMBOL_LOCAL;
+}
+
 void IndirectSymtabSection::writeTo(uint8_t *buf) const {
   uint32_t off = 0;
   for (const Symbol *sym : in.got->getEntries()) {
-    write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
+    write32le(buf + off * sizeof(uint32_t), indirectValue(sym));
     ++off;
   }
   for (const Symbol *sym : in.tlvPointers->getEntries()) {
-    write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
+    write32le(buf + off * sizeof(uint32_t), indirectValue(sym));
     ++off;
   }
   for (const Symbol *sym : in.stubs->getEntries()) {
-    write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
+    write32le(buf + off * sizeof(uint32_t), indirectValue(sym));
     ++off;
   }
 }

diff  --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 30e50b3194ce..7df8a8b62713 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -12,11 +12,13 @@
 #include "MergedOutputSection.h"
 #include "OutputSection.h"
 #include "OutputSegment.h"
+#include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
 
 #include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/BinaryFormat/MachO.h"
 
@@ -81,6 +83,10 @@ using namespace lld::macho;
 // all sizes. Therefore, we don't even bother implementing the regular
 // non-compressed format. Time will tell if anyone in the field ever
 // overflows the 127-encodings limit.
+//
+// Refer to the definition of unwind_info_section_header in
+// compact_unwind_encoding.h for an overview of the format we are encoding
+// here.
 
 // TODO(gkm): prune __eh_frame entries superseded by __unwind_info
 // TODO(gkm): how do we align the 2nd-level pages?
@@ -94,9 +100,115 @@ bool UnwindInfoSection::isNeeded() const {
   return (compactUnwindSection != nullptr);
 }
 
+// Compact unwind relocations have 
diff erent semantics, so we handle them in a
+// separate code path from regular relocations. First, we do not wish to add
+// rebase opcodes for __LD,__compact_unwind, because that section doesn't
+// actually end up in the final binary. Second, personality pointers always
+// reside in the GOT and must be treated specially.
+void macho::prepareCompactUnwind(InputSection *isec) {
+  assert(isec->segname == segment_names::ld &&
+         isec->name == section_names::compactUnwind);
+
+  DenseMap<std::pair<InputSection *, uint64_t /* addend */>, macho::Symbol *>
+      anonPersonalitySymbols;
+  for (Reloc &r : isec->relocs) {
+    // TODO: generalize for other archs
+    assert(r.type == X86_64_RELOC_UNSIGNED);
+    if (r.offset % sizeof(CompactUnwindEntry64) !=
+        offsetof(struct CompactUnwindEntry64, personality))
+      continue;
+
+    if (auto *s = r.referent.dyn_cast<lld::macho::Symbol *>()) {
+      if (auto *undefined = dyn_cast<Undefined>(s))
+        treatUndefinedSymbol(*undefined);
+      else
+        in.got->addEntry(s);
+    } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
+      // Personality functions can be referenced via section relocations
+      // if they live in an object file (instead of a dylib). Create
+      // placeholder synthetic symbols for them in the GOT.
+      macho::Symbol *&s = anonPersonalitySymbols[{referentIsec, r.addend}];
+      if (s == nullptr) {
+        s = make<Defined>("<internal>", nullptr, referentIsec, r.addend, false,
+                          false, false);
+        in.got->addEntry(s);
+      }
+      r.referent = s;
+      r.addend = 0;
+    }
+  }
+}
+
+// Unwind info lives in __DATA, and finalization of __TEXT will occur before
+// finalization of __DATA. Moreover, the finalization of unwind info depends on
+// the exact addresses that it references. So it is safe for compact unwind to
+// reference addresses in __TEXT, but not addresses in any other segment.
+static void checkTextSegment(InputSection *isec) {
+  if (isec->segname != segment_names::text)
+    error("compact unwind references address in " + toString(isec) +
+          " which is not in segment __TEXT");
+}
+
+// We need to apply the relocations to the pre-link compact unwind section
+// before converting it to post-link form. There should only be absolute
+// relocations here: since we are not emitting the pre-link CU section, there
+// is no source address to make a relative location meaningful.
+static void relocateCompactUnwind(MergedOutputSection *compactUnwindSection,
+                                  std::vector<CompactUnwindEntry64> &cuVector) {
+  for (InputSection *isec : compactUnwindSection->inputs) {
+    uint8_t *buf =
+        reinterpret_cast<uint8_t *>(cuVector.data()) + isec->outSecFileOff;
+    memcpy(buf, isec->data.data(), isec->data.size());
+
+    for (Reloc &r : isec->relocs) {
+      uint64_t referentVA = 0;
+      if (auto *referentSym = r.referent.dyn_cast<macho::Symbol *>()) {
+        if (!isa<Undefined>(referentSym)) {
+          assert(referentSym->isInGot());
+          if (auto *defined = dyn_cast<Defined>(referentSym))
+            checkTextSegment(defined->isec);
+          // At this point in the link, we may not yet know the final address of
+          // the GOT, so we just encode the index. We make it a 1-based index so
+          // that we can distinguish the null pointer case.
+          referentVA = referentSym->gotIndex + 1;
+        }
+      } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
+        checkTextSegment(referentIsec);
+        referentVA = referentIsec->getVA() + r.addend;
+      }
+      support::endian::write64le(buf + r.offset, referentVA);
+    }
+  }
+}
+
+// There should only be a handful of unique personality pointers, so we can
+// encode them as 2-bit indices into a small array.
+void encodePersonalities(const std::vector<CompactUnwindEntry64 *> &cuPtrVector,
+                         std::vector<uint32_t> &personalities) {
+  for (CompactUnwindEntry64 *cu : cuPtrVector) {
+    if (cu->personality == 0)
+      continue;
+    uint32_t personalityOffset = cu->personality - in.header->addr;
+    // Linear search is fast enough for a small array.
+    auto it = find(personalities, personalityOffset);
+    uint32_t personalityIndex; // 1-based index
+    if (it != personalities.end()) {
+      personalityIndex = std::distance(personalities.begin(), it) + 1;
+    } else {
+      personalities.push_back(cu->personality);
+      personalityIndex = personalities.size();
+    }
+    cu->encoding |=
+        personalityIndex << countTrailingZeros(
+            static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK));
+  }
+  if (personalities.size() > 3)
+    error("too many personalities (" + std::to_string(personalities.size()) +
+          ") for compact unwind to encode");
+}
+
 // Scan the __LD,__compact_unwind entries and compute the space needs of
 // __TEXT,__unwind_info and __TEXT,__eh_frame
-
 void UnwindInfoSection::finalize() {
   if (compactUnwindSection == nullptr)
     return;
@@ -114,12 +226,12 @@ void UnwindInfoSection::finalize() {
       compactUnwindSection->getSize() / sizeof(CompactUnwindEntry64);
   cuVector.resize(cuCount);
   // Relocate all __LD,__compact_unwind entries
-  compactUnwindSection->writeTo(reinterpret_cast<uint8_t *>(cuVector.data()));
+  relocateCompactUnwind(compactUnwindSection, cuVector);
 
   // Rather than sort & fold the 32-byte entries directly, we create a
   // vector of pointers to entries and sort & fold that instead.
   cuPtrVector.reserve(cuCount);
-  for (const CompactUnwindEntry64 &cuEntry : cuVector)
+  for (CompactUnwindEntry64 &cuEntry : cuVector)
     cuPtrVector.emplace_back(&cuEntry);
   std::sort(cuPtrVector.begin(), cuPtrVector.end(),
             [](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) {
@@ -146,6 +258,8 @@ void UnwindInfoSection::finalize() {
   }
   cuPtrVector.erase(foldWrite, cuPtrVector.end());
 
+  encodePersonalities(cuPtrVector, personalities);
+
   // Count frequencies of the folded encodings
   EncodingMap encodingFrequencies;
   for (auto cuPtrEntry : cuPtrVector)
@@ -263,7 +377,7 @@ void UnwindInfoSection::writeTo(uint8_t *buf) const {
 
   // Personalities
   for (const uint32_t &personality : personalities)
-    *i32p++ = personality;
+    *i32p++ = in.got->addr + (personality - 1) * WordSize;
 
   // Level-1 index
   uint32_t lsdaOffset =

diff  --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 2285cf930d83..ebc5c5ca1cc4 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -63,16 +63,19 @@ class UnwindInfoSection : public SyntheticSection {
 private:
   std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings;
   EncodingMap commonEncodingIndexes;
+  // Indices of personality functions within the GOT.
   std::vector<uint32_t> personalities;
   std::vector<unwind_info_section_header_lsda_index_entry> lsdaEntries;
   std::vector<CompactUnwindEntry64> cuVector;
-  std::vector<const CompactUnwindEntry64 *> cuPtrVector;
+  std::vector<CompactUnwindEntry64 *> cuPtrVector;
   std::vector<SecondLevelPage> secondLevelPages;
   MergedOutputSection *compactUnwindSection = nullptr;
   uint64_t level2PagesOffset = 0;
   uint64_t unwindInfoSize = 0;
 };
 
+void prepareCompactUnwind(InputSection *isec);
+
 } // namespace macho
 } // namespace lld
 

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 46667e194288..a52b13abeb05 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -427,11 +427,10 @@ static void prepareSymbolRelocation(lld::macho::Symbol *sym,
 
 void Writer::scanRelocations() {
   for (InputSection *isec : inputSections) {
-    // We do not wish to add rebase opcodes for __LD,__compact_unwind, because
-    // it doesn't actually end up in the final binary. TODO: filtering it out
-    // before Writer runs might be cleaner...
-    if (isec->segname == segment_names::ld)
+    if (isec->segname == segment_names::ld) {
+      prepareCompactUnwind(isec);
       continue;
+    }
 
     for (Reloc &r : isec->relocs) {
       if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND))

diff  --git a/lld/test/MachO/compact-unwind.test b/lld/test/MachO/compact-unwind-generated.test
similarity index 100%
rename from lld/test/MachO/compact-unwind.test
rename to lld/test/MachO/compact-unwind-generated.test

diff  --git a/lld/test/MachO/compact-unwind-pie.s b/lld/test/MachO/compact-unwind-pie.s
deleted file mode 100644
index f67cf71134d1..000000000000
--- a/lld/test/MachO/compact-unwind-pie.s
+++ /dev/null
@@ -1,21 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %s -o %t.o
-# RUN: %lld -pie -lSystem %t.o -o %t
-# RUN: llvm-objdump --macho --unwind-info --rebase %t | FileCheck %s
-
-## Check that we do not add rebase opcodes to the compact unwind section.
-# CHECK:      Contents of __unwind_info section:
-# CHECK-NEXT:   Version:                                   0x1
-# CHECK-NEXT:   Common encodings array section offset:
-# CHECK-NEXT:   Number of common encodings in array:       0x1
-# CHECK:      Rebase table:
-# CHECK-NEXT: segment  section            address     type
-# CHECK-EMPTY:
-
-.globl _main
-.text
-_main:
-  .cfi_startproc
-  .cfi_def_cfa_offset 16
-  retq
-  .cfi_endproc

diff  --git a/lld/test/MachO/compact-unwind.s b/lld/test/MachO/compact-unwind.s
new file mode 100644
index 000000000000..cd9c07afcc7e
--- /dev/null
+++ b/lld/test/MachO/compact-unwind.s
@@ -0,0 +1,49 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %s -o %t.o
+# RUN: %lld -pie -lSystem -lc++ %t.o -o %t
+# RUN: llvm-objdump --macho --unwind-info --indirect-symbols --rebase %t | FileCheck %s
+
+# CHECK:      Indirect symbols for (__DATA_CONST,__got)
+# CHECK-NEXT: address                    index name
+# CHECK-DAG:  0x[[#%x,GXX_PERSONALITY:]] [[#]] ___gxx_personality_v0
+# CHECK-DAG:  0x[[#%x,MY_PERSONALITY:]]  LOCAL
+
+# CHECK:      Contents of __unwind_info section:
+# CHECK:        Personality functions: (count = 2)
+# CHECK-NEXT:     personality[1]: 0x{{0*}}[[#MY_PERSONALITY-0x100000000]]
+# CHECK-NEXT:     personality[2]: 0x{{0*}}[[#GXX_PERSONALITY-0x100000000]]
+
+## Check that we do not add rebase opcodes to the compact unwind section.
+# CHECK:      Rebase table:
+# CHECK-NEXT: segment      section        address          type
+# CHECK-NEXT: __DATA_CONST __got          0x{{[0-9a-f]*}}  pointer
+# CHECK-NEXT: __DATA_CONST __got          0x{{[0-9a-f]*}}  pointer
+# CHECK-EMPTY:
+
+.globl _main, _foo, _my_personality, _bar
+
+.text
+_foo:
+  .cfi_startproc
+  .cfi_personality 155, _my_personality
+  .cfi_def_cfa_offset 16
+  retq
+  .cfi_endproc
+
+_bar:
+  .cfi_startproc
+## Check that we dedup references to the same statically-linked personality.
+  .cfi_personality 155, _my_personality
+  .cfi_def_cfa_offset 16
+  retq
+  .cfi_endproc
+
+_main:
+  .cfi_startproc
+  .cfi_personality 155, ___gxx_personality_v0
+  .cfi_def_cfa_offset 16
+  retq
+  .cfi_endproc
+
+_my_personality:
+  retq

diff  --git a/lld/test/MachO/invalid/compact-unwind-bad-reloc.s b/lld/test/MachO/invalid/compact-unwind-bad-reloc.s
new file mode 100644
index 000000000000..9204cafcfc81
--- /dev/null
+++ b/lld/test/MachO/invalid/compact-unwind-bad-reloc.s
@@ -0,0 +1,17 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %s -o %t.o
+# RUN: not %lld -pie -lSystem -lc++ %t.o -o %t 2>&1 | FileCheck %s -DFILE=%t.o
+# CHECK: error: compact unwind references address in [[FILE]]:(__data) which is not in segment __TEXT
+
+.globl _main, _not_a_function
+.text
+_main:
+  retq
+
+.data
+_not_a_function:
+  .cfi_startproc
+  .cfi_personality 155, ___gxx_personality_v0
+  .cfi_def_cfa_offset 16
+  retq
+  .cfi_endproc

diff  --git a/lld/test/MachO/invalid/compact-unwind-personalities.s b/lld/test/MachO/invalid/compact-unwind-personalities.s
new file mode 100644
index 000000000000..744a4edfcb98
--- /dev/null
+++ b/lld/test/MachO/invalid/compact-unwind-personalities.s
@@ -0,0 +1,45 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %s -o %t.o
+# RUN: not %lld -pie -lSystem -lc++ %t.o -o %t 2>&1 | FileCheck %s --check-prefix=TOO-MANY
+# RUN: not %lld -pie -lSystem %t.o -o %t 2>&1 | FileCheck %s --check-prefix=UNDEF
+# TOO-MANY: error: too many personalities (4) for compact unwind to encode
+# UNDEF: error: undefined symbol: ___gxx_personality_v0
+
+.globl _main, _personality_1, _personality_2, _personality_3
+
+.text
+
+_foo:
+  .cfi_startproc
+  .cfi_personality 155, _personality_1
+  .cfi_def_cfa_offset 16
+  retq
+  .cfi_endproc
+
+_bar:
+  .cfi_startproc
+  .cfi_personality 155, _personality_2
+  .cfi_def_cfa_offset 16
+  retq
+  .cfi_endproc
+
+_baz:
+  .cfi_startproc
+  .cfi_personality 155, _personality_3
+  .cfi_def_cfa_offset 16
+  retq
+  .cfi_endproc
+
+_main:
+  .cfi_startproc
+  .cfi_personality 155, ___gxx_personality_v0
+  .cfi_def_cfa_offset 16
+  retq
+  .cfi_endproc
+
+_personality_1:
+  retq
+_personality_2:
+  retq
+_personality_3:
+  retq

diff  --git a/lld/test/MachO/tools/validate-unwind-info.py b/lld/test/MachO/tools/validate-unwind-info.py
index f3619b4baac6..9f1b726124d9 100755
--- a/lld/test/MachO/tools/validate-unwind-info.py
+++ b/lld/test/MachO/tools/validate-unwind-info.py
@@ -73,8 +73,10 @@ def main():
 
   if program_encodings_map != object_encodings_map:
     if args.debug:
-      pprint("program encodings map:\n" + str(program_encodings_map))
-      pprint("object encodings map:\n" + str(object_encodings_map))
+      print("program encodings map:")
+      pprint(program_encodings_map)
+      print("object encodings map:")
+      pprint(object_encodings_map)
     sys.exit("encoding maps 
diff er")
 
   # Count frequency of object-file folded encodings


        


More information about the llvm-commits mailing list