[lld] [lld-macho] Omit `__llvm_addrsig` metadata from the output (PR #98913)
Daniel Bertalan via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 15:30:22 PDT 2024
https://github.com/BertalanD updated https://github.com/llvm/llvm-project/pull/98913
>From 70aa4c28588e5b6354ef954de30dec9036096297 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Mon, 15 Jul 2024 15:37:41 +0200
Subject: [PATCH 1/2] [lld-macho] Omit `__llvm_addrsig` metadata from the
output
This section contains metadata that's only relevant for Identical Code
Folding at link time, we should not include it in the output.
We still treat it like a regular section during input file parsing (e.g.
create a `ConcatInputSection` for it), as we want its relocations to be
parsed. But it should not be passed to `addInputSection`, as that's what
assigns it to an `OutputSection` and adds it to the `inputSections`
vector which specifies the inputs to dead-stripping and relocation
scanning.
This fixes a "__DATA,__llvm_addrsig, offset 0: fixups overlap" error
when using `--icf=safe` alongside `-fixup_chains`. This occurs because
all `__llvm_addrsig` sections are 8 bytes large, and the relocations
which signify functions whose addresses are taken are all at offset 0.
This makes the fix in 5fa24ac2 ("Category Merger: add support for
addrsig references") obsolete, as we no longer try to resolve symbols
referenced in `__llvm_addrsig` when writing the output file. When we do
iterate its relocations in `markAddrSigSymbols`, we do not try to
resolve their addresses.
---
lld/MachO/Driver.cpp | 3 +++
lld/MachO/ObjC.cpp | 32 --------------------------------
lld/test/MachO/dead-strip.s | 24 ++++++++++++++++++++++++
lld/test/MachO/icf-safe.ll | 11 +++++++----
4 files changed, 34 insertions(+), 36 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 28c28f29defd1..a370d5734124a 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1247,6 +1247,9 @@ static void gatherInputSections() {
// contrast, EH frames are handled like regular ConcatInputSections.)
if (section->name == section_names::compactUnwind)
continue;
+ // Addrsig sections contain metadata only needed at link time.
+ if (section->name == section_names::addrSig)
+ continue;
for (const Subsection &subsection : section->subsections)
addInputSection(subsection.isec);
}
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 740ebaf7e0403..4a6f99654ba13 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -449,7 +449,6 @@ class ObjcCategoryMerger {
mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
void eraseISec(ConcatInputSection *isec);
- void removeRefsToErasedIsecs();
void eraseMergedCategories();
void generateCatListForNonErasedCategories(
@@ -519,8 +518,6 @@ class ObjcCategoryMerger {
std::vector<ConcatInputSection *> &allInputSections;
// Map of base class Symbol to list of InfoInputCategory's for it
MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
- // Set for tracking InputSection erased via eraseISec
- DenseSet<InputSection *> erasedIsecs;
// Normally, the binary data comes from the input files, but since we're
// generating binary data ourselves, we use the below array to store it in.
@@ -1272,8 +1269,6 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
}
void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
- erasedIsecs.insert(isec);
-
isec->live = false;
for (auto &sym : isec->symbols)
sym->used = false;
@@ -1326,33 +1321,6 @@ void ObjcCategoryMerger::eraseMergedCategories() {
catLayout.instancePropsOffset);
}
}
-
- removeRefsToErasedIsecs();
-}
-
-// The compiler may generate references to categories inside the addrsig
-// section. This function will erase these references.
-void ObjcCategoryMerger::removeRefsToErasedIsecs() {
- for (InputSection *isec : inputSections) {
- if (isec->getName() != section_names::addrSig)
- continue;
-
- auto removeRelocs = [this](Reloc &r) {
- auto *isec = dyn_cast_or_null<ConcatInputSection>(
- r.referent.dyn_cast<InputSection *>());
- if (!isec) {
- Defined *sym =
- dyn_cast_or_null<Defined>(r.referent.dyn_cast<Symbol *>());
- if (sym)
- isec = dyn_cast<ConcatInputSection>(sym->isec());
- }
- if (!isec)
- return false;
- return erasedIsecs.count(isec) > 0;
- };
-
- llvm::erase_if(isec->relocs, removeRelocs);
- }
}
void ObjcCategoryMerger::doMerge() {
diff --git a/lld/test/MachO/dead-strip.s b/lld/test/MachO/dead-strip.s
index f593b69843ba6..d107dad53a3c5 100644
--- a/lld/test/MachO/dead-strip.s
+++ b/lld/test/MachO/dead-strip.s
@@ -329,6 +329,17 @@
# LIT-NEXT: Contents of (__TEXT,__literals) section
# LIT-NEXT: ef be ad de {{$}}
+## Ensure that addrsig metadata does not keep unreferenced functions alive.
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/addrsig.s -o %t/addrsig.o
+# RUN: %lld -lSystem -dead_strip --icf=safe %t/addrsig.o -o %t/addrsig
+# RUN: llvm-objdump --syms %t/addrsig | \
+# RUN: FileCheck --check-prefix=ADDSIG --implicit-check-not _addrsig %s
+# ADDSIG-LABEL: SYMBOL TABLE:
+# ADDSIG-NEXT: g F __TEXT,__text _main
+# ADDSIG-NEXT: g F __TEXT,__text __mh_execute_header
+# ADDSIG-NEXT: *UND* dyld_stub_binder
+
## Duplicate symbols that will be dead stripped later should not fail when using
## the --dead-stripped-duplicates flag
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
@@ -988,3 +999,16 @@ _more_data:
_main:
callq _ref_undef_fun
.subsections_via_symbols
+
+#--- addrsig.s
+.globl _main, _addrsig
+_main:
+ retq
+
+_addrsig:
+ retq
+
+.subsections_via_symbols
+
+.addrsig
+.addrsig_sym _addrsig
diff --git a/lld/test/MachO/icf-safe.ll b/lld/test/MachO/icf-safe.ll
index 71c6f9f7ddac8..7012de56e6938 100644
--- a/lld/test/MachO/icf-safe.ll
+++ b/lld/test/MachO/icf-safe.ll
@@ -5,14 +5,14 @@
; RUN: llc -filetype=obj %s -O3 -o %t/icf-obj.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
; RUN: %lld -arch arm64 -lSystem --icf=safe -dylib -o %t/icf-safe.dylib %t/icf-obj.o
; RUN: %lld -arch arm64 -lSystem --icf=all -dylib -o %t/icf-all.dylib %t/icf-obj.o
-; RUN: llvm-objdump %t/icf-safe.dylib -d --macho | FileCheck %s --check-prefix=ICFSAFE
-; RUN: llvm-objdump %t/icf-all.dylib -d --macho | FileCheck %s --check-prefix=ICFALL
+; RUN: llvm-objdump %t/icf-safe.dylib -d -h --macho | FileCheck %s --check-prefixes=ICFSAFE,CHECK
+; RUN: llvm-objdump %t/icf-all.dylib -d -h --macho | FileCheck %s --check-prefixes=ICFALL,CHECK
; RUN: llvm-as %s -o %t/icf-bitcode.o
; RUN: %lld -arch arm64 -lSystem --icf=safe -dylib -o %t/icf-safe-bitcode.dylib %t/icf-bitcode.o
; RUN: %lld -arch arm64 -lSystem --icf=all -dylib -o %t/icf-all-bitcode.dylib %t/icf-bitcode.o
-; RUN: llvm-objdump %t/icf-safe-bitcode.dylib -d --macho | FileCheck %s --check-prefix=ICFSAFE
-; RUN: llvm-objdump %t/icf-all-bitcode.dylib -d --macho | FileCheck %s --check-prefix=ICFALL
+; RUN: llvm-objdump %t/icf-safe-bitcode.dylib -d -h --macho | FileCheck %s --check-prefixes=ICFSAFE,CHECK
+; RUN: llvm-objdump %t/icf-all-bitcode.dylib -d -h --macho | FileCheck %s --check-prefixes=ICFALL,CHECK
; ICFSAFE-LABEL: _callAllFunctions
; ICFSAFE: bl _func02
@@ -24,6 +24,9 @@
; ICFALL-NEXT: bl _func03_takeaddr
; ICFALL-NEXT: bl _func03_takeaddr
+; CHECK-LABEL: Sections:
+; CHECK-NOT: __llvm_addrsig
+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-macos11.0"
>From 3e48cfa45822d469ac57e40383e5e0f6cdbd021c Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Wed, 17 Jul 2024 00:29:52 +0200
Subject: [PATCH 2/2] add more direct regression test
---
lld/test/MachO/icf-safe.ll | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/lld/test/MachO/icf-safe.ll b/lld/test/MachO/icf-safe.ll
index 7012de56e6938..03830a30048a6 100644
--- a/lld/test/MachO/icf-safe.ll
+++ b/lld/test/MachO/icf-safe.ll
@@ -14,6 +14,11 @@
; RUN: llvm-objdump %t/icf-safe-bitcode.dylib -d -h --macho | FileCheck %s --check-prefixes=ICFSAFE,CHECK
; RUN: llvm-objdump %t/icf-all-bitcode.dylib -d -h --macho | FileCheck %s --check-prefixes=ICFALL,CHECK
+;; Regression test: if we tried writing __llvm_addrsig to the output, -fixup_chains would fail with a "fixups overlap"
+;; error, as the relocations (which reference the address-taken functions) are all at offset 0.
+; RUN: %lld -arch arm64 -lSystem --icf=safe -fixup_chains -dylib -o %t/icf-safe-chained.dylib %t/icf-obj.o
+; RUN: llvm-objdump %t/icf-safe-chained.dylib -d -h --macho | FileCheck %s --check-prefixes=ICFSAFE,CHECK
+
; ICFSAFE-LABEL: _callAllFunctions
; ICFSAFE: bl _func02
; ICFSAFE-NEXT: bl _func02
More information about the llvm-commits
mailing list