[llvm] a564551 - [lld/mac] Implement -dead_strip
Nico Weber via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 2 08:09:42 PDT 2021
Author: Nico Weber
Date: 2021-06-02T11:09:26-04:00
New Revision: a5645513dba702216672bc31333e9c173b3a56c5
URL: https://github.com/llvm/llvm-project/commit/a5645513dba702216672bc31333e9c173b3a56c5
DIFF: https://github.com/llvm/llvm-project/commit/a5645513dba702216672bc31333e9c173b3a56c5.diff
LOG: [lld/mac] Implement -dead_strip
Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.
Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).
Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:
% ministat lld_*
x lld_nostrip.txt
+ lld_strip.txt
N Min Max Median Avg Stddev
x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794
+ 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651
Difference at 95.0% confidence
-0.144711 +/- 0.0336749
-3.60967% +/- 0.839989%
(Student's t, pooled s = 0.0358398)
This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols
It's possible it interacts with more features I didn't think of,
of course.
I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests
Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.
Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
(`.no_dead_strip` in asm)
Fixes PR49276.
Differential Revision: https://reviews.llvm.org/D103324
Added:
lld/MachO/MarkLive.cpp
lld/MachO/MarkLive.h
lld/test/MachO/dead-strip.s
Modified:
lld/MachO/CMakeLists.txt
lld/MachO/ConcatOutputSection.cpp
lld/MachO/Config.h
lld/MachO/Driver.cpp
lld/MachO/InputFiles.cpp
lld/MachO/InputSection.h
lld/MachO/MapFile.cpp
lld/MachO/Options.td
lld/MachO/SymbolTable.cpp
lld/MachO/SymbolTable.h
lld/MachO/Symbols.cpp
lld/MachO/Symbols.h
lld/MachO/SyntheticSections.cpp
lld/MachO/SyntheticSections.h
lld/MachO/UnwindInfoSection.cpp
lld/MachO/UnwindInfoSection.h
lld/MachO/Writer.cpp
lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libc++abi.tbd
lld/test/MachO/mh-header-link.s
lld/test/MachO/sectcreate.s
llvm/utils/gn/secondary/lld/MachO/BUILD.gn
Removed:
################################################################################
diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 1c93a8842a283..a805e1789e69d 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -19,6 +19,7 @@ add_lld_library(lldMachO2
InputSection.cpp
LTO.cpp
MapFile.cpp
+ MarkLive.cpp
ObjC.cpp
OutputSection.cpp
OutputSegment.cpp
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index fa47ccf1dc8b2..e9e78f9df5af1 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -299,7 +299,8 @@ void ConcatOutputSection::finalize() {
r.referent = thunkInfo.sym = symtab->addDefined(
thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0,
/*size=*/thunkSize, /*isWeakDef=*/false, /*isPrivateExtern=*/true,
- /*isThumb=*/false, /*isReferencedDynamically=*/false);
+ /*isThumb=*/false, /*isReferencedDynamically=*/false,
+ /*noDeadStrip=*/false);
target->populateThunk(thunkInfo.isec, funcSym);
finalizeOne(thunkInfo.isec);
thunks.push_back(thunkInfo.isec);
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index da263ec3252cd..93e88cfe7eb62 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -111,6 +111,7 @@ struct Configuration {
llvm::StringRef thinLTOJobs;
bool deadStripDylibs = false;
bool demangle = false;
+ bool deadStrip = false;
PlatformInfo platformInfo;
NamespaceKind namespaceKind = NamespaceKind::twolevel;
UndefinedSymbolTreatment undefinedSymbolTreatment =
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index b568609a65e16..7003533b5fdf0 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -10,6 +10,7 @@
#include "Config.h"
#include "InputFiles.h"
#include "LTO.h"
+#include "MarkLive.h"
#include "ObjC.h"
#include "OutputSection.h"
#include "OutputSegment.h"
@@ -541,12 +542,15 @@ static void replaceCommonSymbols() {
isec->flags = S_ZEROFILL;
inputSections.push_back(isec);
+ // FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip
+ // and pass them on here.
replaceSymbol<Defined>(sym, sym->getName(), isec->file, isec, /*value=*/0,
/*size=*/0,
/*isWeakDef=*/false,
/*isExternal=*/true, common->privateExtern,
/*isThumb=*/false,
- /*isReferencedDynamically=*/false);
+ /*isReferencedDynamically=*/false,
+ /*noDeadStrip=*/false);
}
}
@@ -967,6 +971,9 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
depTracker =
make<DependencyTracker>(args.getLastArgValue(OPT_dependency_info));
+ // Must be set before any InputSections and Symbols are created.
+ config->deadStrip = args.hasArg(OPT_dead_strip);
+
config->systemLibraryRoots = getSystemLibraryRoots(args);
if (const char *path = getReproduceOption(args)) {
// Note that --reproduce is a debug option so you can ignore it
@@ -1285,6 +1292,9 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
}
}
+ if (config->deadStrip)
+ markLive();
+
// Write to an output file.
if (target->wordSize == 8)
writeResult<LP64>();
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 6f71d1fb4f91b..15dffab485896 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -488,10 +488,10 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
if (isWeakDefCanBeHidden)
isPrivateExtern = true;
- return symtab->addDefined(name, isec->file, isec, value, size,
- sym.n_desc & N_WEAK_DEF, isPrivateExtern,
- sym.n_desc & N_ARM_THUMB_DEF,
- sym.n_desc & REFERENCED_DYNAMICALLY);
+ return symtab->addDefined(
+ name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF,
+ isPrivateExtern, sym.n_desc & N_ARM_THUMB_DEF,
+ sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
}
assert(!isWeakDefCanBeHidden &&
@@ -499,7 +499,8 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
return make<Defined>(
name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF,
/*isExternal=*/false, /*isPrivateExtern=*/false,
- sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY);
+ sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY,
+ sym.n_desc & N_NO_DEAD_STRIP);
}
// Absolute symbols are defined symbols that do not have an associated
@@ -512,13 +513,15 @@ static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
return symtab->addDefined(name, file, nullptr, sym.n_value, /*size=*/0,
/*isWeakDef=*/false, sym.n_type & N_PEXT,
sym.n_desc & N_ARM_THUMB_DEF,
- /*isReferencedDynamically=*/false);
+ /*isReferencedDynamically=*/false,
+ sym.n_desc & N_NO_DEAD_STRIP);
}
return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
/*isWeakDef=*/false,
/*isExternal=*/false, /*isPrivateExtern=*/false,
sym.n_desc & N_ARM_THUMB_DEF,
- /*isReferencedDynamically=*/false);
+ /*isReferencedDynamically=*/false,
+ sym.n_desc & N_NO_DEAD_STRIP);
}
template <class NList>
@@ -614,7 +617,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
auto *nextIsec = make<InputSection>(*isec);
nextIsec->data = isec->data.slice(symbolOffset);
nextIsec->numRefs = 0;
- nextIsec->canOmitFromOutput = false;
+ nextIsec->wasCoalesced = false;
isec->data = isec->data.slice(0, symbolOffset);
// By construction, the symbol will be at offset zero in the new
@@ -640,6 +643,7 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
isec->segname = segName.take_front(16);
const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
isec->data = {buf, mb.getBufferSize()};
+ isec->live = true;
subsections.push_back({{0, isec}});
}
@@ -1027,7 +1031,8 @@ static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
return symtab->addDefined(name, &file, /*isec=*/nullptr, /*value=*/0,
/*size=*/0, objSym.isWeak(), isPrivateExtern,
/*isThumb=*/false,
- /*isReferencedDynamically=*/false);
+ /*isReferencedDynamically=*/false,
+ /*noDeadStrip=*/false);
}
BitcodeFile::BitcodeFile(MemoryBufferRef mbref)
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 4655e33943949..032e9d08001d2 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -9,6 +9,7 @@
#ifndef LLD_MACHO_INPUT_SECTION_H
#define LLD_MACHO_INPUT_SECTION_H
+#include "Config.h"
#include "Relocations.h"
#include "lld/Common/LLVM.h"
@@ -47,17 +48,17 @@ class InputSection {
// How many symbols refer to this InputSection.
uint32_t numRefs = 0;
- // True if this InputSection could not be written to the output file.
- // With subsections_via_symbols, most symbol have its own InputSection,
+ // With subsections_via_symbols, most symbols have their own InputSection,
// and for weak symbols (e.g. from inline functions), only the
// InputSection from one translation unit will make it to the output,
// while all copies in other translation units are coalesced into the
// first and not copied to the output.
- bool canOmitFromOutput = false;
+ bool wasCoalesced = false;
- bool shouldOmitFromOutput() const {
- return canOmitFromOutput && numRefs == 0;
- }
+ bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; }
+ bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
+
+ bool live = !config->deadStrip;
ArrayRef<uint8_t> data;
std::vector<Reloc> relocs;
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index b52e509285913..37789e3f29486 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -64,11 +64,9 @@ static std::vector<Defined *> getSymbols() {
for (InputFile *file : inputFiles)
if (isa<ObjFile>(file))
for (Symbol *sym : file->symbols) {
- if (sym == nullptr)
- continue;
- if (auto *d = dyn_cast<Defined>(sym))
- if (d->isec && d->getFile() == file) {
- assert(!d->isec->shouldOmitFromOutput() &&
+ if (auto *d = dyn_cast_or_null<Defined>(sym))
+ if (d->isLive() && d->isec && d->getFile() == file) {
+ assert(!d->isec->isCoalescedWeak() &&
"file->symbols should store resolved symbols");
v.push_back(d);
}
diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp
new file mode 100644
index 0000000000000..b425906668974
--- /dev/null
+++ b/lld/MachO/MarkLive.cpp
@@ -0,0 +1,189 @@
+//===- MarkLive.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MarkLive.h"
+#include "Config.h"
+#include "OutputSegment.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "UnwindInfoSection.h"
+#include "mach-o/compact_unwind_encoding.h"
+#include "llvm/Support/TimeProfiler.h"
+
+namespace lld {
+namespace macho {
+
+using namespace llvm;
+using namespace llvm::MachO;
+
+// Set live bit on for each reachable chunk. Unmarked (unreachable)
+// InputSections will be ignored by Writer, so they will be excluded
+// from the final output.
+void markLive() {
+ TimeTraceScope timeScope("markLive");
+
+ // We build up a worklist of sections which have been marked as live. We only
+ // push into the worklist when we discover an unmarked section, and we mark
+ // as we push, so sections never appear twice in the list.
+ SmallVector<InputSection *, 256> worklist;
+
+ auto enqueue = [&](InputSection *s) {
+ if (s->live)
+ return;
+ s->live = true;
+ worklist.push_back(s);
+ };
+
+ auto addSym = [&](Symbol *s) {
+ s->used = true;
+ if (auto *d = dyn_cast<Defined>(s))
+ if (d->isec)
+ enqueue(d->isec);
+ };
+
+ // Add GC roots.
+ if (config->entry)
+ addSym(config->entry);
+ for (Symbol *sym : symtab->getSymbols()) {
+ if (auto *defined = dyn_cast<Defined>(sym)) {
+ // -exported_symbol(s_list)
+ if (!config->exportedSymbols.empty() &&
+ config->exportedSymbols.match(defined->getName())) {
+ // FIXME: Instead of doing this here, maybe the Driver code doing
+ // the matching should add them to explicitUndefineds? Then the
+ // explicitUndefineds code below would handle this automatically.
+ assert(!defined->privateExtern &&
+ "should have been rejected by driver");
+ addSym(defined);
+ continue;
+ }
+
+ // public symbols explicitly marked .no_dead_strip
+ if (defined->referencedDynamically || defined->noDeadStrip) {
+ addSym(defined);
+ continue;
+ }
+
+ // FIXME: When we implement these flags, make symbols from them GC roots:
+ // * -reexported_symbol(s_list)
+ // * -alias(-list)
+ // * -init
+
+ // In dylibs and bundles, all external functions are GC roots.
+ // FIXME: -export_dynamic should enable this for executables too.
+ if (config->outputType != MH_EXECUTE && !defined->privateExtern) {
+ addSym(defined);
+ continue;
+ }
+ }
+ }
+ // -u symbols
+ for (Symbol *sym : config->explicitUndefineds)
+ if (auto *defined = dyn_cast<Defined>(sym))
+ addSym(defined);
+ // local symbols explicitly marked .no_dead_strip
+ for (const InputFile *file : inputFiles)
+ if (auto *objFile = dyn_cast<ObjFile>(file))
+ for (Symbol *sym : objFile->symbols)
+ if (auto *defined = dyn_cast_or_null<Defined>(sym))
+ if (!defined->isExternal() && defined->noDeadStrip)
+ addSym(defined);
+ if (auto *stubBinder =
+ dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder")))
+ addSym(stubBinder);
+ for (InputSection *isec : inputSections) {
+ // Sections marked no_dead_strip
+ if (isec->flags & S_ATTR_NO_DEAD_STRIP) {
+ enqueue(isec);
+ continue;
+ }
+
+ // mod_init_funcs, mod_term_funcs sections
+ if (sectionType(isec->flags) == S_MOD_INIT_FUNC_POINTERS ||
+ sectionType(isec->flags) == S_MOD_TERM_FUNC_POINTERS) {
+ enqueue(isec);
+ continue;
+ }
+
+ // Dead strip runs before UnwindInfoSection handling so we need to keep
+ // __LD,__compact_unwind alive here.
+ // But that section contains absolute references to __TEXT,__text and
+ // keeps most code alive due to that. So we can't just enqueue() the
+ // section: We must skip the relocations for the functionAddress
+ // in each CompactUnwindEntry.
+ // See also scanEhFrameSection() in lld/ELF/MarkLive.cpp.
+ if (isec->segname == segment_names::ld &&
+ isec->name == section_names::compactUnwind) {
+ isec->live = true;
+ const int compactUnwindEntrySize =
+ target->wordSize == 8 ? sizeof(CompactUnwindEntry<uint64_t>)
+ : sizeof(CompactUnwindEntry<uint32_t>);
+ for (const Reloc &r : isec->relocs) {
+ // This is the relocation for the address of the function itself.
+ // Ignore it, else these would keep everything alive.
+ if (r.offset % compactUnwindEntrySize == 0)
+ continue;
+
+ if (auto *s = r.referent.dyn_cast<Symbol *>())
+ addSym(s);
+ else {
+ auto *referentIsec = r.referent.get<InputSection *>();
+ assert(!referentIsec->isCoalescedWeak());
+ enqueue(referentIsec);
+ }
+ }
+ continue;
+ }
+ }
+
+ do {
+ // Mark things reachable from GC roots as live.
+ while (!worklist.empty()) {
+ InputSection *s = worklist.pop_back_val();
+ assert(s->live && "We mark as live when pushing onto the worklist!");
+
+ // Mark all symbols listed in the relocation table for this section.
+ for (const Reloc &r : s->relocs) {
+ if (auto *s = r.referent.dyn_cast<Symbol *>()) {
+ addSym(s);
+ } else {
+ auto *referentIsec = r.referent.get<InputSection *>();
+ assert(!referentIsec->isCoalescedWeak());
+ enqueue(referentIsec);
+ }
+ }
+ }
+
+ // S_ATTR_LIVE_SUPPORT sections are live if they point _to_ a live section.
+ // Process them in a second pass.
+ for (InputSection *isec : inputSections) {
+ // FIXME: Check if copying all S_ATTR_LIVE_SUPPORT sections into a
+ // separate vector and only walking that here is faster.
+ if (!(isec->flags & S_ATTR_LIVE_SUPPORT) || isec->live)
+ continue;
+
+ for (const Reloc &r : isec->relocs) {
+ bool referentLive;
+ if (auto *s = r.referent.dyn_cast<Symbol *>())
+ referentLive = s->isLive();
+ else
+ referentLive = r.referent.get<InputSection *>()->live;
+ if (referentLive)
+ enqueue(isec);
+ }
+ }
+
+ // S_ATTR_LIVE_SUPPORT could have marked additional sections live,
+ // which in turn could mark additional S_ATTR_LIVE_SUPPORT sections live.
+ // Iterate. In practice, the second iteration won't mark additional
+ // S_ATTR_LIVE_SUPPORT sections live.
+ } while (!worklist.empty());
+}
+
+} // namespace macho
+} // namespace lld
diff --git a/lld/MachO/MarkLive.h b/lld/MachO/MarkLive.h
new file mode 100644
index 0000000000000..4db657c5728de
--- /dev/null
+++ b/lld/MachO/MarkLive.h
@@ -0,0 +1,20 @@
+//===- MarkLive.h -----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_MARKLIVE_H
+#define LLD_MACHO_MARKLIVE_H
+
+namespace lld {
+namespace macho {
+
+void markLive();
+
+} // namespace macho
+} // namespace lld
+
+#endif // LLD_MACHO_MARKLIVE_H
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 47c6d9997e98d..31c5a4449969e 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -218,7 +218,6 @@ def grp_opts : OptionGroup<"opts">, HelpText<"OPTIMIZATIONS">;
def dead_strip : Flag<["-"], "dead_strip">,
HelpText<"Remove unreachable functions and data">,
- Flags<[HelpHidden]>,
Group<grp_opts>;
def order_file : Separate<["-"], "order_file">,
MetaVarName<"<file>">,
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 1a9cc4e2c99d4..985188556f83e 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -46,7 +46,8 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
InputSection *isec, uint64_t value,
uint64_t size, bool isWeakDef,
bool isPrivateExtern, bool isThumb,
- bool isReferencedDynamically) {
+ bool isReferencedDynamically,
+ bool noDeadStrip) {
Symbol *s;
bool wasInserted;
bool overridesWeakDef = false;
@@ -63,6 +64,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
// If one of them isn't private extern, the merged symbol isn't.
defined->privateExtern &= isPrivateExtern;
defined->referencedDynamically |= isReferencedDynamically;
+ defined->noDeadStrip |= noDeadStrip;
// FIXME: Handle this for bitcode files.
// FIXME: We currently only do this if both symbols are weak.
@@ -70,7 +72,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
// case where !isWeakDef && defined->isWeakDef() right
// requires some care and testing).
if (isec)
- isec->canOmitFromOutput = true;
+ isec->wasCoalesced = true;
}
return defined;
@@ -89,7 +91,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
Defined *defined = replaceSymbol<Defined>(
s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true,
- isPrivateExtern, isThumb, isReferencedDynamically);
+ isPrivateExtern, isThumb, isReferencedDynamically, noDeadStrip);
defined->overridesWeakDef = overridesWeakDef;
return defined;
}
@@ -188,7 +190,8 @@ Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec,
bool referencedDynamically) {
Defined *s = addDefined(name, nullptr, isec, value, /*size=*/0,
/*isWeakDef=*/false, isPrivateExtern,
- /*isThumb=*/false, referencedDynamically);
+ /*isThumb=*/false, referencedDynamically,
+ /*noDeadStrip=*/false);
s->includeInSymtab = includeInSymtab;
return s;
}
diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index 462c84b7aeea1..17f1ecbd346bd 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -40,7 +40,7 @@ class SymbolTable {
Defined *addDefined(StringRef name, InputFile *, InputSection *,
uint64_t value, uint64_t size, bool isWeakDef,
bool isPrivateExtern, bool isThumb,
- bool isReferencedDynamically);
+ bool isReferencedDynamically, bool noDeadStrip);
Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef);
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 853ec7452e520..1f28cebb80d60 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -31,7 +31,29 @@ uint64_t Symbol::getStubVA() const { return in.stubs->getVA(stubsIndex); }
uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); }
uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); }
+bool Symbol::isLive() const {
+ if (isa<DylibSymbol>(this) || isa<Undefined>(this))
+ return used;
+
+ if (auto *d = dyn_cast<Defined>(this)) {
+ // Non-absolute symbols might be alive because their section is
+ // no_dead_strip or live_support. In that case, the section will know
+ // that it's live but `used` might be false. Non-absolute symbols always
+ // have to use the section's `live` bit as source of truth.
+ return d->isAbsolute() ? used : d->isec->live;
+ }
+
+ assert(!isa<CommonSymbol>(this) &&
+ "replaceCommonSymbols() runs before dead code stripping, and isLive() "
+ "should only be called after dead code stripping");
+
+ // Assume any other kind of symbol is live.
+ return true;
+}
+
uint64_t Defined::getVA() const {
+ assert(isLive() && "this should only be called for live symbols");
+
if (isAbsolute())
return value;
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 42a9aee116e51..29eaad808cae3 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -51,6 +51,8 @@ class Symbol {
return {nameData, nameSize};
}
+ bool isLive() const;
+
virtual uint64_t getVA() const { return 0; }
virtual uint64_t getFileOffset() const {
@@ -96,7 +98,8 @@ class Symbol {
protected:
Symbol(Kind k, StringRefZ name, InputFile *file)
: symbolKind(k), nameData(name.data), nameSize(name.size), file(file),
- isUsedInRegularObj(!file || isa<ObjFile>(file)) {}
+ isUsedInRegularObj(!file || isa<ObjFile>(file)),
+ used(!config->deadStrip) {}
Kind symbolKind;
const char *nameData;
@@ -105,19 +108,22 @@ class Symbol {
public:
// True if this symbol was referenced by a regular (non-bitcode) object.
- bool isUsedInRegularObj;
+ bool isUsedInRegularObj : 1;
+
+ // True if an undefined or dylib symbol is used from a live section.
+ bool used : 1;
};
class Defined : public Symbol {
public:
Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
- bool isThumb, bool isReferencedDynamically)
+ bool isThumb, bool isReferencedDynamically, bool noDeadStrip)
: Symbol(DefinedKind, name, file), isec(isec), value(value), size(size),
overridesWeakDef(false), privateExtern(isPrivateExtern),
includeInSymtab(true), thumb(isThumb),
- referencedDynamically(isReferencedDynamically), weakDef(isWeakDef),
- external(isExternal) {
+ referencedDynamically(isReferencedDynamically),
+ noDeadStrip(noDeadStrip), weakDef(isWeakDef), external(isExternal) {
if (isec)
isec->numRefs++;
}
@@ -156,7 +162,14 @@ class Defined : public Symbol {
// symbol table by tools like strip. In theory, this could be set on arbitrary
// symbols in input object files. In practice, it's used solely for the
// synthetic __mh_execute_header symbol.
+ // This is information for the static linker, and it's also written to the
+ // output file's symbol table for tools running later (such as `strip`).
bool referencedDynamically : 1;
+ // Set on symbols that should not be removed by dead code stripping.
+ // Set for example on `__attribute__((used))` globals, or on some Objective-C
+ // metadata. This is information only for the static linker and not written
+ // to the output.
+ bool noDeadStrip : 1;
private:
const bool weakDef : 1;
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index a0ed37b3e050b..8179de7780407 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -469,11 +469,15 @@ void StubHelperSection::setup() {
in.got->addEntry(stubBinder);
inputSections.push_back(in.imageLoaderCache);
+ // Since this isn't in the symbol table or in any input file, the noDeadStrip
+ // argument doesn't matter. It's kept alive by ImageLoaderCacheSection()
+ // setting `live` to true on the backing InputSection.
dyldPrivate =
make<Defined>("__dyld_private", nullptr, in.imageLoaderCache, 0, 0,
/*isWeakDef=*/false,
/*isExternal=*/false, /*isPrivateExtern=*/false,
- /*isThumb=*/false, /*isReferencedDynamically=*/false);
+ /*isThumb=*/false, /*isReferencedDynamically=*/false,
+ /*noDeadStrip=*/false);
}
ImageLoaderCacheSection::ImageLoaderCacheSection() {
@@ -483,6 +487,7 @@ ImageLoaderCacheSection::ImageLoaderCacheSection() {
memset(arr, 0, target->wordSize);
data = {arr, target->wordSize};
align = target->wordSize;
+ live = true;
}
LazyPointerSection::LazyPointerSection()
@@ -571,7 +576,7 @@ void ExportSection::finalizeContents() {
trieBuilder.setImageBase(in.header->addr);
for (const Symbol *sym : symtab->getSymbols()) {
if (const auto *defined = dyn_cast<Defined>(sym)) {
- if (defined->privateExtern)
+ if (defined->privateExtern || !defined->isLive())
continue;
trieBuilder.addSymbol(*defined);
hasWeakSymbol = hasWeakSymbol || sym->isWeakDef();
@@ -590,7 +595,7 @@ void FunctionStartsSection::finalizeContents() {
uint64_t addr = in.header->addr;
for (const Symbol *sym : symtab->getSymbols()) {
if (const auto *defined = dyn_cast<Defined>(sym)) {
- if (!defined->isec || !isCodeSection(defined->isec))
+ if (!defined->isec || !isCodeSection(defined->isec) || !defined->isLive())
continue;
// TODO: Add support for thumbs, in that case
// the lowest bit of nextAddr needs to be set to 1.
@@ -667,6 +672,8 @@ void SymtabSection::emitStabs() {
for (const SymtabEntry &entry :
concat<SymtabEntry>(localSymbols, externalSymbols)) {
Symbol *sym = entry.sym;
+ assert(sym->isLive() &&
+ "dead symbols should not be in localSymbols, externalSymbols");
if (auto *defined = dyn_cast<Defined>(sym)) {
if (defined->isAbsolute())
continue;
@@ -729,12 +736,8 @@ void SymtabSection::finalizeContents() {
for (const InputFile *file : inputFiles) {
if (auto *objFile = dyn_cast<ObjFile>(file)) {
for (Symbol *sym : objFile->symbols) {
- if (sym == nullptr)
- continue;
- // TODO: when we implement -dead_strip, we should filter out symbols
- // that belong to dead sections.
- if (auto *defined = dyn_cast<Defined>(sym)) {
- if (!defined->isExternal()) {
+ if (auto *defined = dyn_cast_or_null<Defined>(sym)) {
+ if (!defined->isExternal() && defined->isLive()) {
StringRef name = defined->getName();
if (!name.startswith("l") && !name.startswith("L"))
addSymbol(localSymbols, sym);
@@ -750,6 +753,8 @@ void SymtabSection::finalizeContents() {
addSymbol(localSymbols, dyldPrivate);
for (Symbol *sym : symtab->getSymbols()) {
+ if (!sym->isLive())
+ continue;
if (auto *defined = dyn_cast<Defined>(sym)) {
if (!defined->includeInSymtab)
continue;
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 5778b9ed9c54a..be77c587f4fa2 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -220,7 +220,7 @@ struct WeakBindingEntry {
// other dylibs should coalesce to.
//
// 2) Weak bindings: These tell dyld that a given symbol reference should
-// coalesce to a non-weak definition if one is found. Note that unlike in the
+// coalesce to a non-weak definition if one is found. Note that unlike the
// entries in the BindingSection, the bindings here only refer to these
// symbols by name, but do not specify which dylib to load them from.
class WeakBindingSection : public LinkEditSection {
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index f684f7649c7a2..f629556b1f3cb 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -89,19 +89,11 @@ using namespace lld::macho;
// compact_unwind_encoding.h for an overview of the format we are encoding
// here.
-// TODO(gkm): prune __eh_frame entries superseded by __unwind_info
+// TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410
// TODO(gkm): how do we align the 2nd-level pages?
using EncodingMap = llvm::DenseMap<compact_unwind_encoding_t, size_t>;
-template <class Ptr> struct CompactUnwindEntry {
- Ptr functionAddress;
- uint32_t functionLength;
- compact_unwind_encoding_t encoding;
- Ptr personality;
- Ptr lsda;
-};
-
struct SecondLevelPage {
uint32_t kind;
size_t entryIndex;
@@ -146,6 +138,11 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(InputSection *isec) {
assert(!isec->shouldOmitFromOutput() &&
"__compact_unwind section should not be omitted");
+ // FIXME: This could skip relocations for CompactUnwindEntries that
+ // point to dead-stripped functions. That might save some amount of
+ // work. But since there are usually just few personality functions
+ // that are referenced from many places, at least some of them likely
+ // live, it wouldn't reduce number of got entries.
for (Reloc &r : isec->relocs) {
assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
@@ -177,17 +174,20 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(InputSection *isec) {
}
if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
- assert(!referentIsec->shouldOmitFromOutput());
+ assert(!referentIsec->isCoalescedWeak());
// Personality functions can be referenced via section relocations
// if they live in the same object file. Create placeholder synthetic
// symbols for them in the GOT.
Symbol *&s = personalityTable[{referentIsec, r.addend}];
if (s == nullptr) {
+ // This runs after dead stripping, so the noDeadStrip argument does not
+ // matter.
s = make<Defined>("<internal>", /*file=*/nullptr, referentIsec,
r.addend, /*size=*/0, /*isWeakDef=*/false,
/*isExternal=*/false, /*isPrivateExtern=*/false,
- /*isThumb=*/false, /*isReferencedDynamically=*/false);
+ /*isThumb=*/false, /*isReferencedDynamically=*/false,
+ /*noDeadStrip=*/false);
in.got->addEntry(s);
}
r.referent = s;
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 3f20245b6d5ac..d530cddf1d903 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -20,6 +20,14 @@
namespace lld {
namespace macho {
+template <class Ptr> struct CompactUnwindEntry {
+ Ptr functionAddress;
+ uint32_t functionLength;
+ compact_unwind_encoding_t encoding;
+ Ptr personality;
+ Ptr lsda;
+};
+
class UnwindInfoSection : public SyntheticSection {
public:
bool isNeeded() const override { return compactUnwindSection != nullptr; }
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index da2ae690c45b7..477ccb7617899 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -609,9 +609,10 @@ void Writer::scanSymbols() {
TimeTraceScope timeScope("Scan symbols");
for (const Symbol *sym : symtab->getSymbols()) {
if (const auto *defined = dyn_cast<Defined>(sym)) {
- if (defined->overridesWeakDef)
+ if (defined->overridesWeakDef && defined->isLive())
in.weakBinding->addNonWeakDefinition(defined);
} else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
+ // This branch intentionally doesn't check isLive().
if (dysym->isDynamicLookup())
continue;
dysym->getFile()->refState =
diff --git a/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libc++abi.tbd b/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libc++abi.tbd
index 1be7f3d737ece..2f2e79d01e897 100644
--- a/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libc++abi.tbd
+++ b/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libc++abi.tbd
@@ -6,5 +6,5 @@ install-name: '/usr/lib/libc++abi.dylib'
current-version: 1281
exports:
- archs: [ i386, x86_64, arm64 ]
- symbols: [ ___gxx_personality_v0 ]
+ symbols: [ ___cxa_allocate_exception, ___cxa_begin_catch, ___cxa_end_catch, ___cxa_throw, ___gxx_personality_v0, __ZTIi ]
...
diff --git a/lld/test/MachO/dead-strip.s b/lld/test/MachO/dead-strip.s
new file mode 100644
index 0000000000000..c117d65fd2592
--- /dev/null
+++ b/lld/test/MachO/dead-strip.s
@@ -0,0 +1,737 @@
+# REQUIRES: x86
+
+# RUN: rm -rf %t; split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/basics.s -o %t/basics.o
+
+## Check that .private_extern symbols are marked as local in the symbol table
+## and aren't in the export trie.
+## Dead-stripped symbols should also not be in a map file output.
+# RUN: %lld -lSystem -dead_strip -map %t/map -u _ref_private_extern_u \
+# RUN: %t/basics.o -o %t/basics
+# RUN: llvm-objdump --syms --section-headers %t/basics | \
+# RUN: FileCheck --check-prefix=EXEC --implicit-check-not _unref %s
+# RUN: llvm-objdump --macho --section=__DATA,__ref_section \
+# RUN: --exports-trie --indirect-symbols %t/basics | \
+# RUN: FileCheck --check-prefix=EXECDATA --implicit-check-not _unref %s
+# RUN: llvm-otool -l %t/basics | grep -q 'segname __PAGEZERO'
+# RUN: FileCheck --check-prefix=MAP --implicit-check-not _unref %s < %t/map
+# EXEC-LABEL: Sections:
+# EXEC-LABEL: Name
+# EXEC-NEXT: __text
+# EXEC-NEXT: __got
+# EXEC-NEXT: __ref_section
+# EXEC-NEXT: __common
+# EXEC-LABEL: SYMBOL TABLE:
+# EXEC-NEXT: l {{.*}} _ref_data
+# EXEC-NEXT: l {{.*}} _ref_local
+# EXEC-NEXT: l {{.*}} _ref_from_no_dead_strip_globl
+# EXEC-NEXT: l {{.*}} _no_dead_strip_local
+# EXEC-NEXT: l {{.*}} _ref_from_no_dead_strip_local
+# EXEC-NEXT: l {{.*}} _ref_private_extern_u
+# EXEC-NEXT: l {{.*}} _main
+# EXEC-NEXT: l {{.*}} _ref_private_extern
+# EXEC-NEXT: g {{.*}} _ref_com
+# EXEC-NEXT: g {{.*}} _no_dead_strip_globl
+# EXEC-NEXT: g {{.*}} __mh_execute_header
+# EXECDATA-LABEL: Indirect symbols
+# EXECDATA-NEXT: name
+# EXECDATA-NEXT: _ref_com
+# EXECDATA-LABEL: Contents of (__DATA,__ref_section) section
+# EXECDATA-NEXT: 04 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00
+# EXECDATA-LABEL: Exports trie:
+# EXECDATA-NEXT: __mh_execute_header
+# EXECDATA-NEXT: _ref_com
+# EXECDATA-NEXT: _no_dead_strip_globl
+# MAP: _main
+
+# RUN: %lld -dylib -dead_strip -u _ref_private_extern_u %t/basics.o -o %t/basics.dylib
+# RUN: llvm-objdump --syms %t/basics.dylib | \
+# RUN: FileCheck --check-prefix=DYLIB --implicit-check-not _unref %s
+# RUN: %lld -bundle -dead_strip -u _ref_private_extern_u %t/basics.o -o %t/basics.dylib
+# RUN: llvm-objdump --syms %t/basics.dylib | \
+# RUN: FileCheck --check-prefix=DYLIB --implicit-check-not _unref %s
+# DYLIB-LABEL: SYMBOL TABLE:
+# DYLIB-NEXT: l {{.*}} _ref_data
+# DYLIB-NEXT: l {{.*}} _ref_local
+# DYLIB-NEXT: l {{.*}} _ref_from_no_dead_strip_globl
+# DYLIB-NEXT: l {{.*}} _no_dead_strip_local
+# DYLIB-NEXT: l {{.*}} _ref_from_no_dead_strip_local
+# DYLIB-NEXT: l {{.*}} _ref_private_extern_u
+# DYLIB-NEXT: l {{.*}} _ref_private_extern
+# DYLIB-NEXT: g {{.*}} _ref_com
+# DYLIB-NEXT: g {{.*}} _unref_com
+# DYLIB-NEXT: g {{.*}} _unref_extern
+# DYLIB-NEXT: g {{.*}} _no_dead_strip_globl
+
+## Absolute symbol handling.
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/abs.s -o %t/abs.o
+# RUN: %lld -lSystem -dead_strip %t/abs.o -o %t/abs
+# RUN: llvm-objdump --macho --syms --exports-trie %t/abs | \
+# RUN: FileCheck --check-prefix=ABS %s
+#ABS-LABEL: SYMBOL TABLE:
+#ABS-NEXT: g {{.*}} _main
+#ABS-NEXT: g *ABS* _abs1
+#ABS-NEXT: g {{.*}} __mh_execute_header
+#ABS-LABEL: Exports trie:
+#ABS-NEXT: __mh_execute_header
+#ABS-NEXT: _main
+#ABS-NEXT: _abs1 [absolute]
+
+## Check that symbols from -exported_symbol(s_list) are preserved.
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/exported-symbol.s -o %t/exported-symbol.o
+# RUN: %lld -lSystem -dead_strip -exported_symbol _my_exported_symbol \
+# RUN: %t/exported-symbol.o -o %t/exported-symbol
+# RUN: llvm-objdump --syms %t/exported-symbol | \
+# RUN: FileCheck --check-prefix=EXPORTEDSYMBOL --implicit-check-not _unref %s
+# EXPORTEDSYMBOL-LABEL: SYMBOL TABLE:
+# EXPORTEDSYMBOL-NEXT: l {{.*}} _main
+# EXPORTEDSYMBOL-NEXT: l {{.*}} __mh_execute_header
+# EXPORTEDSYMBOL-NEXT: g {{.*}} _my_exported_symbol
+
+## Check that mod_init_funcs and mod_term_funcs are not stripped.
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/mod-funcs.s -o %t/mod-funcs.o
+# RUN: %lld -lSystem -dead_strip %t/mod-funcs.o -o %t/mod-funcs
+# RUN: llvm-objdump --syms %t/mod-funcs | \
+# RUN: FileCheck --check-prefix=MODFUNCS --implicit-check-not _unref %s
+# MODFUNCS-LABEL: SYMBOL TABLE:
+# MODFUNCS-NEXT: l {{.*}} _ref_from_init
+# MODFUNCS-NEXT: l {{.*}} _ref_init
+# MODFUNCS-NEXT: l {{.*}} _ref_from_term
+# MODFUNCS-NEXT: l {{.*}} _ref_term
+# MODFUNCS-NEXT: g {{.*}} _main
+# MODFUNCS-NEXT: g {{.*}} __mh_execute_header
+
+## Check that DylibSymbols in dead subsections are stripped: They should
+## not be in the import table and should have no import stubs.
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/dylib.s -o %t/dylib.o
+# RUN: %lld -dylib -dead_strip %t/dylib.o -o %t/dylib.dylib
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/strip-dylib-ref.s -o %t/strip-dylib-ref.o
+# RUN: %lld -lSystem -dead_strip %t/strip-dylib-ref.o %t/dylib.dylib \
+# RUN: -o %t/strip-dylib-ref -U _ref_undef_fun -U _unref_undef_fun
+# RUN: llvm-objdump --syms --bind --lazy-bind --weak-bind %t/strip-dylib-ref | \
+# RUN: FileCheck --check-prefix=STRIPDYLIB --implicit-check-not _unref %s
+# STRIPDYLIB: SYMBOL TABLE:
+# STRIPDYLIB-NEXT: l {{.*}} __dyld_private
+# STRIPDYLIB-NEXT: g {{.*}} _main
+# STRIPDYLIB-NEXT: g {{.*}} __mh_execute_header
+# STRIPDYLIB-NEXT: *UND* _ref_undef_fun
+# STRIPDYLIB-NEXT: *UND* dyld_stub_binder
+# STRIPDYLIB-NEXT: *UND* _ref_dylib_fun
+# STRIPDYLIB: Bind table:
+# STRIPDYLIB: Lazy bind table:
+# STRIPDYLIB: __DATA __la_symbol_ptr {{.*}} flat-namespace _ref_undef_fun
+# STRIPDYLIB: __DATA __la_symbol_ptr {{.*}} dylib _ref_dylib_fun
+# STRIPDYLIB: Weak bind table:
+## Stubs smoke check: There should be two stubs entries, not four, but we
+## don't verify that they belong to _ref_undef_fun and _ref_dylib_fun.
+# RUN: llvm-objdump -d --section=__stubs --section=__stub_helper \
+# RUN: %t/strip-dylib-ref |FileCheck --check-prefix=STUBS %s
+# STUBS-LABEL: <__stubs>:
+# STUBS-NEXT: jmpq
+# STUBS-NEXT: jmpq
+# STUBS-NOT: jmpq
+# STUBS-LABEL: <__stub_helper>:
+# STUBS: pushq $0
+# STUBS: jmp
+# STUBS: jmp
+# STUBS-NOT: jmp
+## An undefined symbol referenced from a dead-stripped function shouldn't
+## produce a diagnostic:
+# RUN: %lld -lSystem -dead_strip %t/strip-dylib-ref.o %t/dylib.dylib \
+# RUN: -o %t/strip-dylib-ref -U _ref_undef_fun
+
+## S_ATTR_LIVE_SUPPORT tests.
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/live-support.s -o %t/live-support.o
+# RUN: %lld -lSystem -dead_strip %t/live-support.o %t/dylib.dylib \
+# RUN: -U _ref_undef_fun -U _unref_undef_fun -o %t/live-support
+# RUN: llvm-objdump --syms %t/live-support | \
+# RUN: FileCheck --check-prefix=LIVESUPP --implicit-check-not _unref %s
+# LIVESUPP-LABEL: SYMBOL TABLE:
+# LIVESUPP-NEXT: l {{.*}} _ref_ls_fun_fw
+# LIVESUPP-NEXT: l {{.*}} _ref_ls_fun_bw
+# LIVESUPP-NEXT: l {{.*}} _ref_ls_dylib_fun
+# LIVESUPP-NEXT: l {{.*}} _ref_ls_undef_fun
+# LIVESUPP-NEXT: l {{.*}} __dyld_private
+# LIVESUPP-NEXT: g {{.*}} _main
+# LIVESUPP-NEXT: g {{.*}} _bar
+# LIVESUPP-NEXT: g {{.*}} _foo
+# LIVESUPP-NEXT: g {{.*}} __mh_execute_header
+# LIVESUPP-NEXT: *UND* _ref_undef_fun
+# LIVESUPP-NEXT: *UND* dyld_stub_binder
+# LIVESUPP-NEXT: *UND* _ref_dylib_fun
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/live-support-iterations.s -o %t/live-support-iterations.o
+# RUN: %lld -lSystem -dead_strip %t/live-support-iterations.o \
+# RUN: -o %t/live-support-iterations
+# RUN: llvm-objdump --syms %t/live-support-iterations | \
+# RUN: FileCheck --check-prefix=LIVESUPP2 --implicit-check-not _unref %s
+# LIVESUPP2-LABEL: SYMBOL TABLE:
+# LIVESUPP2-NEXT: l {{.*}} _bar
+# LIVESUPP2-NEXT: l {{.*}} _foo_refd
+# LIVESUPP2-NEXT: l {{.*}} _bar_refd
+# LIVESUPP2-NEXT: l {{.*}} _baz
+# LIVESUPP2-NEXT: l {{.*}} _baz_refd
+# LIVESUPP2-NEXT: l {{.*}} _foo
+# LIVESUPP2-NEXT: g {{.*}} _main
+# LIVESUPP2-NEXT: g {{.*}} __mh_execute_header
+
+## Dead stripping should not remove the __TEXT,__unwind_info
+## and __TEXT,__gcc_except_tab functions, but it should still
+## remove the unreferenced function __Z5unref.
+## The reference to ___gxx_personality_v0 should also not be
+## stripped.
+## (Need to use darwin19.0.0 to make -mc emit __LD,__compact_unwind.)
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 \
+# RUN: %t/unwind.s -o %t/unwind.o
+# RUN: %lld -lc++ -lSystem -dead_strip %t/unwind.o -o %t/unwind
+# RUN: llvm-objdump --syms %t/unwind | \
+# RUN: FileCheck --check-prefix=UNWIND --implicit-check-not unref %s
+# RUN: llvm-otool -l %t/unwind | grep -q 'sectname __unwind_info'
+# RUN: llvm-otool -l %t/unwind | grep -q 'sectname __gcc_except_tab'
+# UNWIND-LABEL: SYMBOL TABLE:
+# UNWIND-NEXT: l O __TEXT,__gcc_except_tab GCC_except_table1
+# UNWIND-NEXT: l O __DATA,__data __dyld_private
+# UNWIND-NEXT: g F __TEXT,__text _main
+# UNWIND-NEXT: g F __TEXT,__text __mh_execute_header
+# UNWIND-NEXT: *UND* ___cxa_allocate_exception
+# UNWIND-NEXT: *UND* ___cxa_end_catch
+# UNWIND-NEXT: *UND* __ZTIi
+# UNWIND-NEXT: *UND* ___cxa_throw
+# UNWIND-NEXT: *UND* ___gxx_personality_v0
+# UNWIND-NEXT: *UND* ___cxa_begin_catch
+# UNWIND-NEXT: *UND* dyld_stub_binder
+
+## If a dead stripped function has a strong ref to a dylib symbol but
+## a live function only a weak ref, the dylib is still not a WEAK_DYLIB.
+## This matches ld64.
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/weak-ref.s -o %t/weak-ref.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/strong-dead-ref.s -o %t/strong-dead-ref.o
+# RUN: %lld -lSystem -dead_strip %t/weak-ref.o %t/strong-dead-ref.o \
+# RUN: %t/dylib.dylib -o %t/weak-ref
+# RUN: llvm-otool -l %t/weak-ref | FileCheck -DDIR=%t --check-prefix=WEAK %s
+# WEAK: cmd LC_LOAD_DYLIB
+# WEAK-NEXT: cmdsize
+# WEAK-NEXT: name /usr/lib/libSystem.dylib
+# WEAK: cmd LC_LOAD_DYLIB
+# WEAK-NEXT: cmdsize
+# WEAK-NEXT: name [[DIR]]/dylib.dylib
+
+## A strong symbol that would override a weak import does not emit the
+## "this overrides a weak import" opcode if it is dead-stripped.
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/weak-dylib.s -o %t/weak-dylib.o
+# RUN: %lld -dylib -dead_strip %t/weak-dylib.o -o %t/weak-dylib.dylib
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/dead-weak-override.s -o %t/dead-weak-override.o
+# RUN: %lld -dead_strip %t/dead-weak-override.o %t/weak-dylib.dylib \
+# RUN: -o %t/dead-weak-override
+# RUN: llvm-objdump --macho --weak-bind --private-header \
+# RUN: %t/dead-weak-override | FileCheck --check-prefix=DEADWEAK %s
+# DEADWEAK-NOT: WEAK_DEFINES
+# DEADWEAK: Weak bind table:
+# DEADWEAK: segment section address type addend symbol
+# DEADWEAK-NOT: strong _weak_in_dylib
+
+## Stripped symbols should not be in the debug info stabs entries.
+# RUN: llvm-mc -g -filetype=obj -triple=x86_64-apple-macos \
+# RUN: %t/debug.s -o %t/debug.o
+# RUN: %lld -lSystem -dead_strip %t/debug.o -o %t/debug
+# RUN: dsymutil -s %t/debug | FileCheck --check-prefix=EXECSTABS %s
+# EXECSTABS-NOT: N_FUN {{.*}} '_unref'
+# EXECSTABS: N_FUN {{.*}} '_main'
+# EXECSTABS-NOT: N_FUN {{.*}} '_unref'
+
+#--- basics.s
+.comm _ref_com, 1
+.comm _unref_com, 1
+
+.section __DATA,__unref_section
+_unref_data:
+ .quad 4
+
+l_unref_data:
+ .quad 5
+
+## Referenced by no_dead_strip == S_ATTR_NO_DEAD_STRIP
+.section __DATA,__ref_section,regular,no_dead_strip
+
+## Referenced because in no_dead_strip section.
+_ref_data:
+ .quad 4
+
+## This is a local symbol so it's not in the symbol table, but
+## it is still in the section data.
+l_ref_data:
+ .quad 5
+
+.text
+
+# Exported symbols should not be stripped from dylibs
+# or bundles, but they should be stripped from executables.
+.globl _unref_extern
+_unref_extern:
+ callq _ref_local
+ retq
+
+# Unreferenced local symbols should be stripped.
+_unref_local:
+ retq
+
+# Same for unreferenced private externs.
+.globl _unref_private_extern
+.private_extern _unref_private_extern
+_unref_private_extern:
+ # This shouldn't create an indirect symbol since it's
+ # a reference from a dead function.
+ movb _unref_com at GOTPCREL(%rip), %al
+ retq
+
+# Referenced local symbols should not be stripped.
+_ref_local:
+ callq _ref_private_extern
+ retq
+
+# Same for referenced private externs.
+# This one is referenced by a relocation.
+.globl _ref_private_extern
+.private_extern _ref_private_extern
+_ref_private_extern:
+ retq
+
+# This one is referenced by a -u flag.
+.globl _ref_private_extern_u
+.private_extern _ref_private_extern_u
+_ref_private_extern_u:
+ retq
+
+# Entry point should not be stripped for executables, even if hidden.
+# For shared libraries this is stripped since it's just a regular hidden
+# symbol there.
+.globl _main
+.private_extern _main
+_main:
+ movb _ref_com at GOTPCREL(%rip), %al
+ callq _ref_local
+ retq
+
+# Things marked no_dead_strip should not be stripped either.
+# (clang emits this e.g. for `__attribute__((used))` globals.)
+# Both for .globl symbols...
+.globl _no_dead_strip_globl
+.no_dead_strip _no_dead_strip_globl
+_no_dead_strip_globl:
+ callq _ref_from_no_dead_strip_globl
+ retq
+_ref_from_no_dead_strip_globl:
+ retq
+
+# ...and for locals.
+.no_dead_strip _no_dead_strip_local
+_no_dead_strip_local:
+ callq _ref_from_no_dead_strip_local
+ retq
+_ref_from_no_dead_strip_local:
+ retq
+
+.subsections_via_symbols
+
+#--- exported-symbol.s
+.text
+
+.globl _unref_symbol
+_unref_symbol:
+ retq
+
+.globl _my_exported_symbol
+_my_exported_symbol:
+ retq
+
+.globl _main
+_main:
+ retq
+
+.subsections_via_symbols
+
+#--- abs.s
+.globl _abs1, _abs2, _abs3
+
+.no_dead_strip _abs1
+_abs1 = 1
+_abs2 = 2
+_abs3 = 3
+
+.section __DATA,__foo,regular,no_dead_strip
+# Absolute symbols are not in a section, so the no_dead_strip
+# on the section above has no effect.
+.globl _abs4
+_abs4 = 4
+
+.text
+.globl _main
+_main:
+ # This is relaxed away, so there's no relocation here and
+ # _abs3 isn't in the exported symbol table.
+ mov _abs3, %rax
+ retq
+
+.subsections_via_symbols
+
+#--- mod-funcs.s
+## Roughly based on `clang -O2 -S` output for `struct A { A(); ~A(); }; A a;`
+## for mod_init_funcs. mod_term_funcs then similar to that.
+.section __TEXT,__StaticInit,regular,pure_instructions
+
+__unref:
+ retq
+
+_ref_from_init:
+ retq
+
+_ref_init:
+ callq _ref_from_init
+ retq
+
+_ref_from_term:
+ retq
+
+_ref_term:
+ callq _ref_from_term
+ retq
+
+.globl _main
+_main:
+ retq
+
+.section __DATA,__mod_init_func,mod_init_funcs
+.quad _ref_init
+
+.section __DATA,__mod_term_func,mod_term_funcs
+.quad _ref_term
+
+.subsections_via_symbols
+
+#--- dylib.s
+.text
+
+.globl _ref_dylib_fun
+_ref_dylib_fun:
+ retq
+
+.globl _unref_dylib_fun
+_unref_dylib_fun:
+ retq
+
+.subsections_via_symbols
+
+#--- strip-dylib-ref.s
+.text
+
+_unref:
+ callq _ref_dylib_fun
+ callq _unref_dylib_fun
+ callq _ref_undef_fun
+ callq _unref_undef_fun
+ retq
+
+.globl _main
+_main:
+ callq _ref_dylib_fun
+ callq _ref_undef_fun
+ retq
+
+.subsections_via_symbols
+
+#--- live-support.s
+## In practice, live_support is used for instruction profiling
+## data and asan. (Also for __eh_frame, but that needs special handling
+## in the linker anyways.)
+## This test isn't based on anything happening in real code though.
+.section __TEXT,__ref_ls_fw,regular,live_support
+_ref_ls_fun_fw:
+ # This is called by _main and is kept alive by normal
+ # forward liveness propagation, The live_support attribute
+ # does nothing in this case.
+ retq
+
+.section __TEXT,__unref_ls_fw,regular,live_support
+_unref_ls_fun_fw:
+ retq
+
+.section __TEXT,__ref_ls_bw,regular,live_support
+_ref_ls_fun_bw:
+ # This _calls_ something that's alive but isn't referenced itself. This is
+ # kept alive only due to this being in a live_support section.
+ callq _foo
+
+ # _bar on the other hand is kept alive since it's called from here.
+ callq _bar
+ retq
+
+## Kept alive by a live symbol form a dynamic library.
+_ref_ls_dylib_fun:
+ callq _ref_dylib_fun
+ retq
+
+## Kept alive by a live undefined symbol.
+_ref_ls_undef_fun:
+ callq _ref_undef_fun
+ retq
+
+## All symbols in this live_support section reference dead symbols
+## and are hence dead themselves.
+.section __TEXT,__unref_ls_bw,regular,live_support
+_unref_ls_fun_bw:
+ callq _unref
+ retq
+
+_unref_ls_dylib_fun_bw:
+ callq _unref_dylib_fun
+ retq
+
+_unref_ls_undef_fun_bw:
+ callq _unref_undef_fun
+ retq
+
+.text
+.globl _unref
+_unref:
+ retq
+
+.globl _bar
+_bar:
+ retq
+
+.globl _foo
+_foo:
+ callq _ref_ls_fun_fw
+ retq
+
+.globl _main
+_main:
+ callq _ref_ls_fun_fw
+ callq _foo
+ callq _ref_dylib_fun
+ callq _ref_undef_fun
+ retq
+
+.subsections_via_symbols
+
+#--- live-support-iterations.s
+.section __TEXT,_ls,regular,live_support
+
+## This is a live_support subsection that only becomes
+## live after _foo below is processed. This means the algorithm of
+## 1. mark things reachable from gc roots live
+## 2. go through live sections and mark the ones live pointing to
+## live symbols or sections
+## needs more than one iteration, since _bar won't be live when step 2
+## runs for the first time.
+## (ld64 gets this wrong -- it has
diff erent output based on if _bar is
+## before _foo or after it.)
+_bar:
+ callq _foo_refd
+ callq _bar_refd
+ retq
+
+## Same here. This is maybe more interesting since it references a live_support
+## symbol instead of a "normal" symbol.
+_baz:
+ callq _foo_refd
+ callq _baz_refd
+ retq
+
+_foo:
+ callq _main
+ callq _foo_refd
+ retq
+
+## Test no_dead_strip on a symbol in a live_support section.
+## ld64 ignores this, but that doesn't look intentional. So lld honors it.
+.no_dead_strip
+_quux:
+ retq
+
+
+.text
+.globl _main
+_main:
+ movq $0, %rax
+ retq
+
+_foo_refd:
+ retq
+
+_bar_refd:
+ retq
+
+_baz_refd:
+ retq
+
+.subsections_via_symbols
+
+#--- unwind.s
+## This is the output of `clang -O2 -S throw.cc` where throw.cc
+## looks like this:
+## void unref() {}
+## int main() {
+## try {
+## throw 0;
+## } catch (int i) {
+## return i;
+## }
+## }
+.section __TEXT,__text,regular,pure_instructions
+
+.globl __Z5unrefv
+.p2align 4, 0x90
+__Z5unrefv:
+.cfi_startproc
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+ popq %rbp
+ retq
+ .cfi_endproc
+
+.globl _main
+.p2align 4, 0x90
+_main:
+Lfunc_begin0:
+ .cfi_startproc
+ .cfi_personality 155, ___gxx_personality_v0
+ .cfi_lsda 16, Lexception0
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+ pushq %rbx
+ pushq %rax
+ .cfi_offset %rbx, -24
+ movl $4, %edi
+ callq ___cxa_allocate_exception
+ movl $0, (%rax)
+Ltmp0:
+ movq __ZTIi at GOTPCREL(%rip), %rsi
+ movq %rax, %rdi
+ xorl %edx, %edx
+ callq ___cxa_throw
+Ltmp1:
+ ud2
+LBB1_2:
+Ltmp2:
+ movq %rax, %rdi
+ callq ___cxa_begin_catch
+ movl (%rax), %ebx
+ callq ___cxa_end_catch
+ movl %ebx, %eax
+ addq $8, %rsp
+ popq %rbx
+ popq %rbp
+ retq
+Lfunc_end0:
+.cfi_endproc
+
+.section __TEXT,__gcc_except_tab
+.p2align 2
+GCC_except_table1:
+Lexception0:
+ .byte 255 ## @LPStart Encoding = omit
+ .byte 155 ## @TType Encoding = indirect pcrel sdata4
+ .uleb128 Lttbase0-Lttbaseref0
+Lttbaseref0:
+ .byte 1 ## Call site Encoding = uleb128
+ .uleb128 Lcst_end0-Lcst_begin0
+Lcst_begin0:
+ .uleb128 Lfunc_begin0-Lfunc_begin0 ## >> Call Site 1 <<
+ .uleb128 Ltmp0-Lfunc_begin0 ## Call between Lfunc_begin0 and Ltmp0
+ .byte 0 ## has no landing pad
+ .byte 0 ## On action: cleanup
+ .uleb128 Ltmp0-Lfunc_begin0 ## >> Call Site 2 <<
+ .uleb128 Ltmp1-Ltmp0 ## Call between Ltmp0 and Ltmp1
+ .uleb128 Ltmp2-Lfunc_begin0 ## jumps to Ltmp2
+ .byte 1 ## On action: 1
+ .uleb128 Ltmp1-Lfunc_begin0 ## >> Call Site 3 <<
+ .uleb128 Lfunc_end0-Ltmp1 ## Call between Ltmp1 and Lfunc_end0
+ .byte 0 ## has no landing pad
+ .byte 0 ## On action: cleanup
+Lcst_end0:
+ .byte 1 ## >> Action Record 1 <<
+ ## Catch TypeInfo 1
+ .byte 0 ## No further actions
+ .p2align 2
+ ## >> Catch TypeInfos <<
+ .long __ZTIi at GOTPCREL+4 ## TypeInfo 1
+Lttbase0:
+ .p2align 2
+ ## -- End function
+.subsections_via_symbols
+
+#--- weak-ref.s
+.text
+.weak_reference _ref_dylib_fun
+.globl _main
+_main:
+ callq _ref_dylib_fun
+ retq
+
+.subsections_via_symbols
+
+#--- strong-dead-ref.s
+.text
+.globl _unref_dylib_fun
+_unref:
+ callq _unref_dylib_fun
+ retq
+
+.subsections_via_symbols
+
+#--- weak-dylib.s
+.text
+.globl _weak_in_dylib
+.weak_definition _weak_in_dylib
+_weak_in_dylib:
+ retq
+
+.subsections_via_symbols
+
+#--- dead-weak-override.s
+
+## Overrides the _weak_in_dylib symbol in weak-dylib, but is dead stripped.
+.text
+
+#.no_dead_strip _weak_in_dylib
+.globl _weak_in_dylib
+_weak_in_dylib:
+ retq
+
+.globl _main
+_main:
+ retq
+
+.subsections_via_symbols
+
+#--- debug.s
+.text
+.globl _unref
+_unref:
+ retq
+
+.globl _main
+_main:
+ retq
+
+.subsections_via_symbols
diff --git a/lld/test/MachO/mh-header-link.s b/lld/test/MachO/mh-header-link.s
index 5c313e24117a3..80b8f47928b5c 100644
--- a/lld/test/MachO/mh-header-link.s
+++ b/lld/test/MachO/mh-header-link.s
@@ -9,7 +9,7 @@
## (but not in other types of files)
# RUN: llvm-mc %t/dylib.s -triple=x86_64-apple-macos10.15 -filetype=obj -o %t/dylib.o
-# RUN: %lld -pie -dylib %t/dylib.o -o %t/dylib.out
+# RUN: %lld -pie -dylib -dead_strip %t/dylib.o -o %t/dylib.out
# RUN: llvm-objdump -m --syms %t/dylib.out | FileCheck %s --check-prefix DYLIB
# RUN: not %lld -pie -o /dev/null %t/dylib.o 2>&1 | FileCheck %s --check-prefix ERR-DYLIB
@@ -21,7 +21,7 @@
## Test that in an executable, we can link against __mh_execute_header
# RUN: llvm-mc %t/main.s -triple=x86_64-apple-macos10.15 -filetype=obj -o %t/exec.o
-# RUN: %lld -pie %t/exec.o -o %t/exec.out
+# RUN: %lld -pie -dead_strip -lSystem %t/exec.o -o %t/exec.out
## But it would be an error trying to reference __mh_execute_header in a dylib
# RUN: not %lld -pie -o /dev/null -dylib %t/exec.o 2>&1 | FileCheck %s --check-prefix ERR-EXEC
@@ -34,6 +34,7 @@
_main:
mov __mh_execute_header at GOTPCREL(%rip), %rax
ret
+.subsections_via_symbols
#--- dylib.s
.text
@@ -41,3 +42,4 @@ _main:
_main:
mov __mh_dylib_header at GOTPCREL(%rip), %rax
ret
+.subsections_via_symbols
diff --git a/lld/test/MachO/sectcreate.s b/lld/test/MachO/sectcreate.s
index 2934bf7865012..03847e1395b79 100644
--- a/lld/test/MachO/sectcreate.s
+++ b/lld/test/MachO/sectcreate.s
@@ -10,6 +10,16 @@
# RUN: -o %t %t.o
# RUN: llvm-objdump -s %t | FileCheck %s
+## -dead_strip does not strip -sectcreate sections,
+## but also doesn't set S_ATTR_NO_DEAD_STRIP on them.
+# RUN: %lld -dead_strip \
+# RUN: -sectcreate SEG SEC1 %t1 \
+# RUN: -segcreate SEG SEC2 %t3 \
+# RUN: -sectcreate SEG SEC1 %t2 \
+# RUN: -o %t %t.o
+# RUN: llvm-objdump -s %t | FileCheck --check-prefix=STRIPPED %s
+# RUN: llvm-readobj --sections %t | FileCheck --check-prefix=STRIPPEDSEC %s
+
# CHECK: Contents of section __TEXT,__text:
# CHECK: Contents of section __DATA,__data:
# CHECK: my string!.
@@ -19,6 +29,17 @@
# CHECK: Contents of section SEG,SEC2:
# CHECK: -sectcreate 2.
+# STRIPPED: Contents of section __TEXT,__text:
+# STRIPPED-NOT: Contents of section __DATA,__data:
+# STRIPPED-NOT: my string!.
+# STRIPPED: Contents of section SEG,SEC1:
+# STRIPPED: -sectcreate 1.1.
+# STRIPPED: -sectcreate 1.2.
+# STRIPPED: Contents of section SEG,SEC2:
+# STRIPPED: -sectcreate 2.
+
+# STRIPPEDSEC-NOT: NoDeadStrip
+
.text
.global _main
_main:
@@ -29,3 +50,5 @@ _main:
.global my_string
my_string:
.string "my string!"
+
+.subsections_via_symbols
diff --git a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
index 29e10d6a309e6..67cf249173471 100644
--- a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
@@ -36,6 +36,7 @@ static_library("MachO2") {
"InputSection.cpp",
"LTO.cpp",
"MapFile.cpp",
+ "MarkLive.cpp",
"ObjC.cpp",
"OutputSection.cpp",
"OutputSegment.cpp",
More information about the llvm-commits
mailing list