[lld] 4a5e111 - [lld-macho] Better deduplication of personality pointers

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 23 19:02:55 PST 2021


Author: Jez Ng
Date: 2021-02-23T22:02:38-05:00
New Revision: 4a5e111aea7ac78190211a2549f8d0d53ee2f01d

URL: https://github.com/llvm/llvm-project/commit/4a5e111aea7ac78190211a2549f8d0d53ee2f01d
DIFF: https://github.com/llvm/llvm-project/commit/4a5e111aea7ac78190211a2549f8d0d53ee2f01d.diff

LOG: [lld-macho] Better deduplication of personality pointers

{D95809} introduced a mechanism for synthetic symbol creation of personality
pointers. When multiple section relocations referred to the same personality
pointer, it would deduplicate them. However, it neglected to consider that we
could have symbol relocations that also refer to the same personality pointer.
This diff fixes it.

In practice, this mix of relocations arises when there is a statically-linked
personality routine that is referenced from multiple object files. Within the
same object file, it will be referred to via section relocations, but
(obviously) other object files will refer to it via symbol relocations. Failing
to deduplicate these references resulted in us going over the
3-personality-pointer limit when linking some larger applications.

Fixes llvm.org/PR48389.

Reviewed By: #lld-macho, thakis, alexshap

Differential Revision: https://reviews.llvm.org/D97245

Added: 
    

Modified: 
    lld/MachO/UnwindInfoSection.cpp
    lld/test/MachO/compact-unwind.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 507cca0fb601..10304327ed2f 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -100,6 +100,9 @@ bool UnwindInfoSection::isNeeded() const {
   return (compactUnwindSection != nullptr);
 }
 
+SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, macho::Symbol *>
+    personalityTable;
+
 // Compact unwind relocations have 
diff erent semantics, so we handle them in a
 // separate code path from regular relocations. First, we do not wish to add
 // rebase opcodes for __LD,__compact_unwind, because that section doesn't
@@ -109,25 +112,39 @@ void macho::prepareCompactUnwind(InputSection *isec) {
   assert(isec->segname == segment_names::ld &&
          isec->name == section_names::compactUnwind);
 
-  DenseMap<std::pair<InputSection *, uint64_t /* addend */>, macho::Symbol *>
-      anonPersonalitySymbols;
   for (Reloc &r : isec->relocs) {
-    // TODO: generalize for other archs
-    assert(r.type == X86_64_RELOC_UNSIGNED);
+    assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
     if (r.offset % sizeof(CompactUnwindEntry64) !=
         offsetof(struct CompactUnwindEntry64, personality))
       continue;
 
     if (auto *s = r.referent.dyn_cast<lld::macho::Symbol *>()) {
-      if (auto *undefined = dyn_cast<Undefined>(s))
+      if (auto *undefined = dyn_cast<Undefined>(s)) {
         treatUndefinedSymbol(*undefined);
-      else
-        in.got->addEntry(s);
-    } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
+        continue;
+      }
+      if (auto *defined = dyn_cast<Defined>(s)) {
+        // Check if we have created a synthetic symbol at the same address.
+        macho::Symbol *&personality =
+            personalityTable[{defined->isec, defined->value}];
+        if (personality == nullptr) {
+          personality = defined;
+          in.got->addEntry(defined);
+        } else if (personality != defined) {
+          r.referent = personality;
+        }
+        continue;
+      }
+      assert(isa<DylibSymbol>(s));
+      in.got->addEntry(s);
+      continue;
+    }
+
+    if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
       // Personality functions can be referenced via section relocations
-      // if they live in an object file (instead of a dylib). Create
-      // placeholder synthetic symbols for them in the GOT.
-      macho::Symbol *&s = anonPersonalitySymbols[{referentIsec, r.addend}];
+      // if they live in the same object file. Create placeholder synthetic
+      // symbols for them in the GOT.
+      macho::Symbol *&s = personalityTable[{referentIsec, r.addend}];
       if (s == nullptr) {
         s = make<Defined>("<internal>", nullptr, referentIsec, r.addend, false,
                           false, false);
@@ -225,7 +242,6 @@ void UnwindInfoSection::finalize() {
   size_t cuCount =
       compactUnwindSection->getSize() / sizeof(CompactUnwindEntry64);
   cuVector.resize(cuCount);
-  // Relocate all __LD,__compact_unwind entries
   relocateCompactUnwind(compactUnwindSection, cuVector);
 
   // Rather than sort & fold the 32-byte entries directly, we create a

diff  --git a/lld/test/MachO/compact-unwind.s b/lld/test/MachO/compact-unwind.s
index 6cddda9adfc9..3d853f8e9aa6 100644
--- a/lld/test/MachO/compact-unwind.s
+++ b/lld/test/MachO/compact-unwind.s
@@ -1,39 +1,48 @@
 # REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %s -o %t.o
-# RUN: %lld -pie -lSystem -lc++ %t.o -o %t
-# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t | FileCheck %s
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/my_personality.s -o %t/my_personality.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/main.s -o %t/main.o
+# RUN: %lld -pie -lSystem -lc++ %t/my_personality.o %t/main.o -o %t/personality-first
+# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/personality-first | FileCheck %s --check-prefixes=FIRST,CHECK
+# RUN: %lld -pie -lSystem -lc++ %t/main.o %t/my_personality.o -o %t/personality-second
+# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/personality-second | FileCheck %s --check-prefixes=SECOND,CHECK
 
-# CHECK:      Indirect symbols for (__DATA_CONST,__got)
-# CHECK-NEXT: address                    index name
-# CHECK-DAG:  0x[[#%x,GXX_PERSONALITY:]] [[#]] ___gxx_personality_v0
-# CHECK-DAG:  0x[[#%x,MY_PERSONALITY:]]  LOCAL
+# FIRST:      Indirect symbols for (__DATA_CONST,__got)
+# FIRST-NEXT: address                    index name
+# FIRST-DAG:  0x[[#%x,GXX_PERSONALITY:]] [[#]] ___gxx_personality_v0
+# FIRST-DAG:  0x[[#%x,MY_PERSONALITY:]]  LOCAL
+
+# SECOND:      Indirect symbols for (__DATA_CONST,__got)
+# SECOND-NEXT: address                    index name
+# SECOND-DAG:  0x[[#%x,GXX_PERSONALITY:]] [[#]] ___gxx_personality_v0
+# SECOND-DAG:  0x[[#%x,MY_PERSONALITY:]]  [[#]] _my_personality
 
 # CHECK:      SYMBOL TABLE:
 # CHECK-DAG:  [[#%x,MAIN:]]       g  F __TEXT,__text _main
-# CHECK-DAG:  [[#%x,FOO:]]        g  F __TEXT,__text _foo
+# CHECK-DAG:  [[#%x,FOO:]]        l  F __TEXT,__text _foo
 # CHECK-DAG:  [[#%x,EXCEPTION0:]] g  O __TEXT,__gcc_except_tab _exception0
 # CHECK-DAG:  [[#%x,EXCEPTION1:]] g  O __TEXT,__gcc_except_tab _exception1
 
 # CHECK:      Contents of __unwind_info section:
 # CHECK:        Personality functions: (count = 2)
-# CHECK-NEXT:     personality[1]: 0x{{0*}}[[#MY_PERSONALITY-0x100000000]]
-# CHECK-NEXT:     personality[2]: 0x{{0*}}[[#GXX_PERSONALITY-0x100000000]]
+# CHECK-DAG:     personality[{{[0-9]+}}]: 0x{{0*}}[[#MY_PERSONALITY-0x100000000]]
+# CHECK-DAG:     personality[{{[0-9]+}}]: 0x{{0*}}[[#GXX_PERSONALITY-0x100000000]]
 # CHECK:        LSDA descriptors:
-# CHECK-NEXT:     [0]: function offset=0x{{0*}}[[#FOO-0x100000000]],  LSDA offset=0x{{0*}}[[#EXCEPTION0-0x100000000]]
-# CHECK-NEXT:     [1]: function offset=0x{{0*}}[[#MAIN-0x100000000]], LSDA offset=0x{{0*}}[[#EXCEPTION1-0x100000000]]
+# CHECK-DAG:     function offset=0x{{0*}}[[#FOO-0x100000000]],  LSDA offset=0x{{0*}}[[#EXCEPTION0-0x100000000]]
+# CHECK-DAG:     function offset=0x{{0*}}[[#MAIN-0x100000000]], LSDA offset=0x{{0*}}[[#EXCEPTION1-0x100000000]]
 
 ## Check that we do not add rebase opcodes to the compact unwind section.
 # CHECK:      Rebase table:
 # CHECK-NEXT: segment      section        address          type
 # CHECK-NEXT: __DATA_CONST __got          0x{{[0-9a-f]*}}  pointer
-# CHECK-NEXT: __DATA_CONST __got          0x{{[0-9a-f]*}}  pointer
-# CHECK-EMPTY:
-
-.globl _main, _foo, _my_personality, _bar, _exception0, _exception1
+# CHECK-NOT:  __TEXT
 
+#--- my_personality.s
+.globl _my_personality, _exception0
 .text
 _foo:
   .cfi_startproc
+## This will generate a section relocation.
   .cfi_personality 155, _my_personality
   .cfi_lsda 16, _exception0
   .cfi_def_cfa_offset 16
@@ -49,6 +58,17 @@ _bar:
   retq
   .cfi_endproc
 
+_my_personality:
+  retq
+
+.section __TEXT,__gcc_except_tab
+_exception0:
+  .space 1
+
+#--- main.s
+.globl _main, _my_personality, _exception1
+
+.text
 _main:
   .cfi_startproc
   .cfi_personality 155, ___gxx_personality_v0
@@ -57,11 +77,17 @@ _main:
   retq
   .cfi_endproc
 
-_my_personality:
+_baz:
+  .cfi_startproc
+## This will generate a symbol relocation. Check that we reuse the personality
+## referenced by the section relocation in my_personality.s.
+  .cfi_personality 155, _my_personality
+  .cfi_lsda 16, _exception1
+  .cfi_def_cfa_offset 16
   retq
+  .cfi_endproc
+
 
 .section __TEXT,__gcc_except_tab
-_exception0:
-  .space 1
 _exception1:
   .space 1


        


More information about the llvm-commits mailing list