[lld] 7f60ed1 - [reland][lld-macho] Private label aliases to weak symbols should not retain section data
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 23 11:52:32 PST 2022
Author: Jez Ng
Date: 2022-12-23T14:51:18-05:00
New Revision: 7f60ed12effade437028b938ea463f37020f89fd
URL: https://github.com/llvm/llvm-project/commit/7f60ed12effade437028b938ea463f37020f89fd
DIFF: https://github.com/llvm/llvm-project/commit/7f60ed12effade437028b938ea463f37020f89fd.diff
LOG: [reland][lld-macho] Private label aliases to weak symbols should not retain section data
This reverts commit a650f2ec7a37cf1f495108bbb313e948c232c29c.
The crashes it was causing will be fixed by the stacked diff {D140606}.
Added:
lld/test/MachO/weak-def-alias-private-label.s
Modified:
lld/MachO/InputFiles.cpp
lld/MachO/UnwindInfoSection.cpp
Removed:
################################################################################
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 93e5ee6378708..c903c98cc015c 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -628,6 +628,12 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
}
}
+// Symbols with `l` or `L` as a prefix are linker-private and never appear in
+// the output.
+static bool isPrivateLabel(StringRef name) {
+ return name.startswith("l") || name.startswith("L");
+}
+
template <class NList>
static macho::Symbol *createDefined(const NList &sym, StringRef name,
InputSection *isec, uint64_t value,
@@ -697,8 +703,7 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
}
assert(!isWeakDefCanBeHidden &&
"weak_def_can_be_hidden on already-hidden symbol?");
- bool includeInSymtab =
- !name.startswith("l") && !name.startswith("L") && !isEhFrameSection(isec);
+ bool includeInSymtab = !isPrivateLabel(name) && !isEhFrameSection(isec);
return make<Defined>(
name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
/*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab,
@@ -831,17 +836,25 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
}
sections[i]->doneSplitting = true;
+ auto getSymName = [strtab](const NList& sym) -> StringRef {
+ return StringRef(strtab + sym.n_strx);
+ };
+
// Calculate symbol sizes and create subsections by splitting the sections
// along symbol boundaries.
// We populate subsections by repeatedly splitting the last (highest
// address) subsection.
llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
+ // Put private-label symbols after other symbols at the same address.
+ if (nList[lhs].n_value == nList[rhs].n_value)
+ return !isPrivateLabel(getSymName(nList[lhs])) &&
+ isPrivateLabel(getSymName(nList[rhs]));
return nList[lhs].n_value < nList[rhs].n_value;
});
for (size_t j = 0; j < symbolIndices.size(); ++j) {
const uint32_t symIndex = symbolIndices[j];
const NList &sym = nList[symIndex];
- StringRef name = strtab + sym.n_strx;
+ StringRef name = getSymName(sym);
Subsection &subsec = subsections.back();
InputSection *isec = subsec.isec;
@@ -861,8 +874,15 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
if (!subsectionsViaSymbols || symbolOffset == 0 ||
sym.n_desc & N_ALT_ENTRY || !isa<ConcatInputSection>(isec)) {
isec->hasAltEntry = symbolOffset != 0;
- symbols[symIndex] = createDefined(sym, name, isec, symbolOffset,
- symbolSize, forceHidden);
+ // If we have an private-label symbol that's an alias, just reuse the
+ // aliased symbol. Our sorting step above ensures that any such symbols
+ // will appear after the non-private-label ones. See
+ // weak-def-alias-ignored.s for the motivation behind this.
+ if (symbolOffset == 0 && isPrivateLabel(name) && j != 0)
+ symbols[symIndex] = symbols[symbolIndices[j - 1]];
+ else
+ symbols[symIndex] = createDefined(sym, name, isec, symbolOffset,
+ symbolSize, forceHidden);
continue;
}
auto *concatIsec = cast<ConcatInputSection>(isec);
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 8ad2aeb9f2158..470f33523ddb0 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -209,7 +209,7 @@ void UnwindInfoSection::addSymbol(const Defined *d) {
// If we have multiple symbols at the same address, only one of them can have
// an associated unwind entry.
if (!p.second && d->unwindEntry) {
- assert(!p.first->second->unwindEntry);
+ assert(p.first->second == d || !p.first->second->unwindEntry);
p.first->second = d;
}
}
diff --git a/lld/test/MachO/weak-def-alias-private-label.s b/lld/test/MachO/weak-def-alias-private-label.s
new file mode 100644
index 0000000000000..32067b77867b1
--- /dev/null
+++ b/lld/test/MachO/weak-def-alias-private-label.s
@@ -0,0 +1,75 @@
+# REQUIRES: x86
+## This test checks that when we coalesce weak definitions, any private-label
+## aliases to those weak defs don't cause the coalesced data to be retained.
+## This test explicitly creates those private-label symbols, but it was actually
+## motivated by MC's aarch64 backend which automatically creates them when
+## emitting object files. I've chosen to explicitly create them here since we
+## can then reference those symbols for a more complete test.
+##
+## Not retaining the data matters for more than just size -- we have a use case
+## that depends on proper data coalescing to emit a valid file format.
+##
+## ld64 actually treats all local symbol aliases (not just the private ones) the
+## same way. But implementing this is harder -- we would have to create those
+## symbols first (so we can emit their names later), but we would have to
+## ensure the linker correctly shuffles them around when their aliasees get
+## coalesced. Emulating the behavior of weak binds for non-private symbols would
+## be even trickier. Let's just deal with private-label symbols for now until we
+## find a use case for more general local symbols.
+##
+## Finally, ld64 does all this regardless of whether .subsections_via_symbols is
+## specified. We don't. But again, given how rare the lack of that directive is
+## (I've only seen it from hand-written assembly inputs), I don't think we need
+## to worry about it.
+
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-then-private.s -o %t/weak-then-private.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/private-then-weak.s -o %t/private-then-weak.o
+# RUN: %lld -dylib %t/weak-then-private.o %t/private-then-weak.o -o %t/test1
+# RUN: %lld -dylib %t/private-then-weak.o %t/weak-then-private.o -o %t/test2
+# RUN: llvm-objdump --macho --syms --section="__DATA,__data" --weak-bind %t/test1 | FileCheck %s
+# RUN: llvm-objdump --macho --syms --section="__DATA,__data" --weak-bind %t/test2 | FileCheck %s
+
+## Check that we only have one copy of 0x123 in the data, not two.
+# CHECK: Contents of (__DATA,__data) section
+# CHECK-NEXT: 0000000000001000 23 01 00 00 00 00 00 00 00 10 00 00 00 00 00 00 {{$}}
+# CHECK-NEXT: 0000000000001010 00 10 00 00 00 00 00 00 {{$}}
+# CHECK-EMPTY:
+# CHECK-NEXT: SYMBOL TABLE:
+# CHECK-NEXT: 0000000000001008 l O __DATA,__data _ref
+# CHECK-NEXT: 0000000000001010 l O __DATA,__data _ref
+# CHECK-NEXT: 0000000000001000 w O __DATA,__data _weak
+# CHECK-NEXT: 0000000000000000 *UND* dyld_stub_binder
+# CHECK-EMPTY:
+## Even though the references were to the non-weak `l_ignored` aliases, we
+## should still emit weak binds as if they were the `_weak` symbol itself.
+# CHECK-NEXT: Weak bind table:
+# CHECK-NEXT: segment section address type addend symbol
+# CHECK-NEXT: __DATA __data 0x00001008 pointer 0 _weak
+# CHECK-NEXT: __DATA __data 0x00001010 pointer 0 _weak
+
+#--- weak-then-private.s
+.globl _weak
+.weak_definition _weak
+.data
+_weak:
+l_ignored:
+ .quad 0x123
+
+_ref:
+ .quad l_ignored
+
+.subsections_via_symbols
+
+#--- private-then-weak.s
+.globl _weak
+.weak_definition _weak
+.data
+l_ignored:
+_weak:
+ .quad 0x123
+
+_ref:
+ .quad l_ignored
+
+.subsections_via_symbols
More information about the llvm-commits
mailing list