[lld] [lld-macho] Remove symbols to `__mod_init_func` with `-init_offsets` (PR #97156)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 29 01:09:35 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld-macho

Author: Daniel Bertalan (BertalanD)

<details>
<summary>Changes</summary>

When `-fixup_chains`/`-init_offsets` is used, a different section, `__init_offsets` is synthesized from `__mod_init_func`. If there are any symbols defined inside `__mod_init_func`, they are added to the symbol table unconditionally while processing the input files. Later, when querying these symbols' addresses (when constructing the symtab or exports trie), we crash with a null deref, as there is no output section assigned to them.

Just making the symbols point to `__init_offsets` is a bad idea, as the new section stores 32-bit integers instead of 64-bit pointers; accessing the symbols would not do what the programmer intended. We should entirely omit them from the output. This is what ld64 and ld-prime do.

This patch uses the same mechanism as dead-stripping to mark these symbols as not needed in the output. There might be nicer fixes than the workaround, this is discussed in #<!-- -->97155.

Fixes https://github.com/llvm/llvm-project/pull/79894#issuecomment-1944092892
Fixes #<!-- -->94716

---
Full diff: https://github.com/llvm/llvm-project/pull/97156.diff


4 Files Affected:

- (modified) lld/MachO/Driver.cpp (+11) 
- (modified) lld/MachO/Writer.cpp (+11-1) 
- (modified) lld/test/MachO/init-offsets.s (+5-1) 
- (added) lld/test/MachO/invalid/init-offsets.s (+16) 


``````````diff
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 9dddabcf3680c..83c92d214de31 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1393,6 +1393,12 @@ static void handleExplicitExports() {
   }
 }
 
+static void eraseInitializerSymbols() {
+  for (ConcatInputSection *isec : in.initOffsets->inputs())
+    for (Defined *sym : isec->symbols)
+      sym->used = false;
+}
+
 namespace lld {
 namespace macho {
 bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
@@ -1971,6 +1977,11 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
     if (config->deadStrip)
       markLive();
 
+    // Ensure that no symbols point inside __mod_init_func sections if they are
+    // removed due to -init_offsets. This must run after dead stripping.
+    if (config->emitInitOffsets)
+      eraseInitializerSymbols();
+
     // Categories are not subject to dead-strip. The __objc_catlist section is
     // marked as NO_DEAD_STRIP and that propagates into all category data.
     if (args.hasArg(OPT_check_category_conflicts))
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index b9fcb45ef86b2..e6b80c1d42d9e 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -640,7 +640,17 @@ void Writer::treatSpecialUndefineds() {
 
 static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
                                     const lld::macho::Reloc &r) {
-  assert(sym->isLive());
+  if (!sym->isLive()) {
+    if (Defined *defined = dyn_cast<Defined>(sym)) {
+      if (config->emitInitOffsets &&
+          defined->isec()->getName() == section_names::moduleInitFunc)
+        fatal(isec->getLocation(r.offset) + ": cannot reference " +
+              sym->getName() +
+              " defined in __mod_init_func when -init_offsets is used");
+    }
+    assert(false && "referenced symbol must be live");
+  }
+
   const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);
 
   if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {
diff --git a/lld/test/MachO/init-offsets.s b/lld/test/MachO/init-offsets.s
index 844951a1dc380..cf34a9b46f308 100644
--- a/lld/test/MachO/init-offsets.s
+++ b/lld/test/MachO/init-offsets.s
@@ -12,7 +12,7 @@
 # RUN: llvm-objcopy --dump-section=__TEXT,__init_offsets=%t/section.bin %t/out
 # RUN: echo "__TEXT,__init_offsets contents:" >> %t/dump.txt
 # RUN: od -An -txI %t/section.bin >> %t/dump.txt
-# RUN: FileCheck --check-prefix=CONTENT %s < %t/dump.txt
+# RUN: FileCheck --check-prefix=CONTENT --implicit-check-not=_init_ptr %s < %t/dump.txt
 
 ## This test checks that:
 ## - __mod_init_func is replaced by __init_offsets.
@@ -21,6 +21,7 @@
 ##   command line, and in the order they show up within __mod_init_func.
 ## - for undefined and dylib symbols, stubs are created, and the offsets point to those.
 ## - offsets are relative to __TEXT's address, they aren't an absolute virtual address.
+## - symbols defined within __mod_init_func are ignored.
 
 # FLAGS:      sectname __init_offsets
 # FLAGS-NEXT:  segname __TEXT
@@ -48,6 +49,7 @@
 
 #--- first.s
 .globl _first_init, ___isnan, _main
+.globl _init_ptr_1
 .text
 _first_init:
   ret
@@ -55,6 +57,7 @@ _main:
   ret
 
 .section __DATA,__mod_init_func,mod_init_funcs
+_init_ptr_1:
 .quad _first_init
 .quad ___isnan
 
@@ -68,6 +71,7 @@ _second_init:
 
 .section __DATA,__mod_init_func,mod_init_funcs
 .quad _undefined
+_init_ptr_2:
 .quad _second_init
 
 .subsections_via_symbols
diff --git a/lld/test/MachO/invalid/init-offsets.s b/lld/test/MachO/invalid/init-offsets.s
new file mode 100644
index 0000000000000..51a441e0a3e29
--- /dev/null
+++ b/lld/test/MachO/invalid/init-offsets.s
@@ -0,0 +1,16 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: not %lld -lSystem -init_offsets  %t.o -o /dev/null 2>&1 | FileCheck %s
+
+# CHECK: error: {{.*}}init-offsets.s.tmp.o:(symbol _main+0x3): cannot reference _init_slot defined in __mod_init_func when -init_offsets is used
+
+.globl _main
+.text
+_main:
+  leaq _init_slot(%rip), %rax
+
+.section __DATA,__mod_init_func,mod_init_funcs
+_init_slot:
+  .quad _main
+

``````````

</details>


https://github.com/llvm/llvm-project/pull/97156


More information about the llvm-commits mailing list