[lld] [lld][ELF] Add --why-live flag (inspired by Mach-O) (PR #127112)
Daniel Thornburgh via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 13 11:23:03 PST 2025
https://github.com/mysterymath updated https://github.com/llvm/llvm-project/pull/127112
>From 0f1b04312090e4ac35f2adbd5f789e9c19af8929 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 13 Nov 2024 15:12:55 -0800
Subject: [PATCH] [lld][ELF] Add --why-live flag (inspired by Mach-O)
This prints the stack of reasons that symbols that match the given
glob(s) survived GC. It has no effect unless section GC occurs.
A symbol may be live intrisically, because referenced by another symbol
or section, or because part of a live section. Sections have similar
reasons.
This implementation does not require -ffunction-sections or
-fdata-sections to produce readable results, althought it does tend to
work better (as does GC).
---
lld/ELF/Config.h | 1 +
lld/ELF/Driver.cpp | 9 +++
lld/ELF/MarkLive.cpp | 125 +++++++++++++++++++++++++++++++++----
lld/ELF/Options.td | 6 ++
lld/test/ELF/why-live.s | 132 ++++++++++++++++++++++++++++++++++++++++
5 files changed, 260 insertions(+), 13 deletions(-)
create mode 100644 lld/test/ELF/why-live.s
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index b2859486d58e9..12164f5999343 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -223,6 +223,7 @@ struct Config {
llvm::StringRef thinLTOCacheDir;
llvm::StringRef thinLTOIndexOnlyArg;
llvm::StringRef whyExtract;
+ llvm::SmallVector<llvm::GlobPattern, 0> whyLive;
llvm::StringRef cmseInputLib;
llvm::StringRef cmseOutputLib;
StringRef zBtiReport = "none";
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 13e8f8ce6df20..db0b2ea8afcf0 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1472,6 +1472,15 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
ctx.arg.warnSymbolOrdering =
args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
ctx.arg.whyExtract = args.getLastArgValue(OPT_why_extract);
+ for (opt::Arg *arg : args.filtered(OPT_why_live)) {
+ StringRef value(arg->getValue());
+ if (Expected<GlobPattern> pat = GlobPattern::create(arg->getValue())) {
+ ctx.arg.whyLive.emplace_back(std::move(*pat));
+ } else {
+ ErrAlways(ctx) << arg->getSpelling() << ": " << pat.takeError();
+ continue;
+ }
+ }
ctx.arg.zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
ctx.arg.zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
ctx.arg.zForceBti = hasZOption(args, "force-bti");
diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index b6c22884d9176..8e9e385bc26dc 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -29,9 +29,11 @@
#include "Target.h"
#include "lld/Common/CommonLinkerContext.h"
#include "lld/Common/Strings.h"
+#include "llvm/ADT/DenseMapInfoVariant.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/TimeProfiler.h"
+#include <variant>
#include <vector>
using namespace llvm;
@@ -42,6 +44,10 @@ using namespace lld;
using namespace lld::elf;
namespace {
+
+// Something that can be the most proximate reason that something else is alive.
+typedef std::variant<InputSectionBase *, Symbol *> LiveReason;
+
template <class ELFT> class MarkLive {
public:
MarkLive(Ctx &ctx, unsigned partition) : ctx(ctx), partition(partition) {}
@@ -50,7 +56,10 @@ template <class ELFT> class MarkLive {
void moveToMain();
private:
- void enqueue(InputSectionBase *sec, uint64_t offset);
+ void enqueue(InputSectionBase *sec, uint64_t offset = 0,
+ Symbol *sym = nullptr,
+ std::optional<LiveReason> reason = std::nullopt);
+ void printWhyLive(Symbol *s) const;
void markSymbol(Symbol *sym);
void mark();
@@ -70,6 +79,12 @@ template <class ELFT> class MarkLive {
// There are normally few input sections whose names are valid C
// identifiers, so we just store a SmallVector instead of a multimap.
DenseMap<StringRef, SmallVector<InputSectionBase *, 0>> cNamedSections;
+
+ // The most proximate reason that something is live. If something doesn't have
+ // a recorded reason, it is either dead, intrinsically live, or an
+ // unreferenced symbol in a live section. (These cases are trivially
+ // detectable and need not be stored.)
+ DenseMap<LiveReason, LiveReason> whyLive;
};
} // namespace
@@ -101,6 +116,12 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
Symbol &sym = sec.file->getRelocTargetSym(rel);
sym.used = true;
+ LiveReason reason;
+ if (!ctx.arg.whyLive.empty()) {
+ Defined *reasonSym = sec.getEnclosingSymbol(rel.r_offset);
+ reason = reasonSym ? LiveReason(reasonSym) : LiveReason(&sec);
+ }
+
if (auto *d = dyn_cast<Defined>(&sym)) {
auto *relSec = dyn_cast_or_null<InputSectionBase>(d->section);
if (!relSec)
@@ -119,17 +140,29 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
// group/SHF_LINK_ORDER rules (b) if the associated text section should be
// discarded, marking the LSDA will unnecessarily retain the text section.
if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) ||
- relSec->nextInSectionGroup)))
- enqueue(relSec, offset);
+ relSec->nextInSectionGroup))) {
+ Symbol *canonicalSym = d;
+ if (!ctx.arg.whyLive.empty() && d->isSection()) {
+ if (Symbol *s = relSec->getEnclosingSymbol(offset))
+ canonicalSym = s;
+ else
+ canonicalSym = nullptr;
+ }
+ enqueue(relSec, offset, canonicalSym, reason);
+ }
return;
}
- if (auto *ss = dyn_cast<SharedSymbol>(&sym))
- if (!ss->isWeak())
+ if (auto *ss = dyn_cast<SharedSymbol>(&sym)) {
+ if (!ss->isWeak()) {
cast<SharedFile>(ss->file)->isNeeded = true;
+ if (!ctx.arg.whyLive.empty())
+ whyLive.try_emplace(&sym, reason);
+ }
+ }
for (InputSectionBase *sec : cNamedSections.lookup(sym.getName()))
- enqueue(sec, 0);
+ enqueue(sec, 0, nullptr, reason);
}
// The .eh_frame section is an unfortunate special case.
@@ -187,7 +220,8 @@ static bool isReserved(InputSectionBase *sec) {
}
template <class ELFT>
-void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) {
+void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
+ Symbol *sym, std::optional<LiveReason> reason) {
// Usually, a whole section is marked as live or dead, but in mergeable
// (splittable) sections, each piece of data has independent liveness bit.
// So we explicitly tell it which offset is in use.
@@ -201,15 +235,71 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) {
return;
sec->partition = sec->partition ? 1 : partition;
+ if (!ctx.arg.whyLive.empty() && reason) {
+ if (sym) {
+ // If a specific symbol is referenced, that makes it alive. It may in turn
+ // make its section alive.
+ whyLive.try_emplace(sym, *reason);
+ whyLive.try_emplace(sec, sym);
+ } else {
+ // Otherwise, the reference generically makes the section live.
+ whyLive.try_emplace(sec, *reason);
+ }
+ }
+
// Add input section to the queue.
if (InputSection *s = dyn_cast<InputSection>(sec))
queue.push_back(s);
}
+// Print the stack of reasons that the given symbol is live.
+template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
+ // Skip dead symbols. A symbol is dead if it belongs to a dead section.
+ if (auto *d = dyn_cast<Defined>(s)) {
+ auto *reason = dyn_cast_or_null<InputSectionBase>(d->section);
+ if (reason && !reason->isLive())
+ return;
+ }
+
+ auto msg = Msg(ctx);
+ msg << "live symbol: " << toStr(ctx, *s);
+
+ LiveReason cur = s;
+ while (true) {
+ auto it = whyLive.find(cur);
+ // If there is a specific reason this object is live...
+ if (it != whyLive.end()) {
+ cur = it->second;
+ } else {
+ // This object is live, but it has no tracked reason. It is either
+ // intrinsically live or an unreferenced symbol in a live section. Return
+ // in the first case.
+ if (!std::holds_alternative<Symbol *>(cur))
+ return;
+ auto *d = dyn_cast<Defined>(std::get<Symbol *>(cur));
+ if (!d)
+ return;
+ auto *reason = dyn_cast_or_null<InputSectionBase>(d->section);
+ if (!reason)
+ return;
+ cur = LiveReason{reason};
+ }
+
+ msg << "\n>>> kept live by ";
+ if (std::holds_alternative<Symbol *>(cur)) {
+ auto *s = std::get<Symbol *>(cur);
+ msg << toStr(ctx, *s);
+ } else {
+ auto *s = std::get<InputSectionBase *>(cur);
+ msg << toStr(ctx, s);
+ }
+ }
+}
+
template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {
if (auto *d = dyn_cast_or_null<Defined>(sym))
if (auto *isec = dyn_cast_or_null<InputSectionBase>(d->section))
- enqueue(isec, d->value);
+ enqueue(isec, d->value, sym);
}
// This is the main function of the garbage collector.
@@ -256,7 +346,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
}
for (InputSectionBase *sec : ctx.inputSections) {
if (sec->flags & SHF_GNU_RETAIN) {
- enqueue(sec, 0);
+ enqueue(sec, 0, nullptr, std::nullopt);
continue;
}
if (sec->flags & SHF_LINK_ORDER)
@@ -295,7 +385,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
// Preserve special sections and those which are specified in linker
// script KEEP command.
if (isReserved(sec) || ctx.script->shouldKeep(sec)) {
- enqueue(sec, 0);
+ enqueue(sec);
} else if ((!ctx.arg.zStartStopGC || sec->name.starts_with("__libc_")) &&
isValidCIdentifier(sec->name)) {
// As a workaround for glibc libc.a before 2.34
@@ -323,11 +413,20 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
resolveReloc(sec, rel, false);
for (InputSectionBase *isec : sec.dependentSections)
- enqueue(isec, 0);
+ enqueue(isec, 0, nullptr, &sec);
// Mark the next group member.
if (sec.nextInSectionGroup)
- enqueue(sec.nextInSectionGroup, 0);
+ enqueue(sec.nextInSectionGroup, 0, nullptr, &sec);
+ }
+
+ if (!ctx.arg.whyLive.empty()) {
+ for (Symbol *sym : ctx.symtab->getSymbols()) {
+ if (llvm::any_of(ctx.arg.whyLive, [sym](const llvm::GlobPattern &pat) {
+ return pat.match(sym->getName());
+ }))
+ printWhyLive(sym);
+ }
}
}
@@ -353,7 +452,7 @@ template <class ELFT> void MarkLive<ELFT>::moveToMain() {
continue;
if (ctx.symtab->find(("__start_" + sec->name).str()) ||
ctx.symtab->find(("__stop_" + sec->name).str()))
- enqueue(sec, 0);
+ enqueue(sec);
}
mark();
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c31875305952f..babc84f345b95 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -559,6 +559,12 @@ defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and "
"__real_symbol references to symbol">,
MetaVarName<"<symbol>">;
+defm why_live
+ : EEq<"why-live",
+ "Report a chain of references preventing garbage collection for "
+ "each symbol matching <glob>">,
+ MetaVarName<"<glob>">;
+
def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
HelpText<"Linker option extensions">;
diff --git a/lld/test/ELF/why-live.s b/lld/test/ELF/why-live.s
new file mode 100644
index 0000000000000..12d373cd78d28
--- /dev/null
+++ b/lld/test/ELF/why-live.s
@@ -0,0 +1,132 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -n -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: echo -e ".globl test_shared\n .section .test_shared,\"ax\", at progbits\n test_shared: jmp test_shared" |\
+# RUN: llvm-mc -n -filetype=obj -triple=x86_64 -o %t.shared.o
+# RUN: ld.lld -shared %t.shared.o -o %t.so
+
+## Simple live section
+.globl _start
+.section ._start,"ax", at progbits
+_start:
+jmp test_simple
+jmp .Llocal
+jmp .Llocal_within_symbol
+jmp test_shared
+.size _start, .-_start
+
+.globl test_simple
+.section .test_simple,"ax", at progbits
+test_simple:
+jmp test_simple
+jmp test_from_unsized
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_simple | FileCheck %s --check-prefix=SIMPLE
+
+# SIMPLE: live symbol: test_simple
+# SIMPLE-NEXT: >>> kept live by _start
+
+## Live only by being a member of .test_simple
+.globl test_incidental
+test_incidental:
+jmp test_incidental
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_incidental | FileCheck %s --check-prefix=INCIDENTAL
+
+# INCIDENTAL: live symbol: test_incidental
+# INCIDENTAL-NEXT: >>> kept live by {{.*}}.o:(.test_simple)
+# INCIDENTAL-NEXT: >>> kept live by test_simple
+# INCIDENTAL-NEXT: >>> kept live by _start
+
+## Reached from a reference in section .test_simple directly, since test_simple is an unsized symbol.
+.globl test_from_unsized
+.section .test_from_unsized,"ax", at progbits
+test_from_unsized:
+jmp test_from_unsized
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_from_unsized | FileCheck %s --check-prefix=FROM-UNSIZED
+
+# FROM-UNSIZED: live symbol: test_from_unsized
+# FROM-UNSIZED-NEXT: >>> kept live by {{.*}}.o:(.test_simple)
+# FROM-UNSIZED-NEXT: >>> kept live by test_simple
+# FROM-UNSIZED-NEXT: >>> kept live by _start
+
+## Symbols in dead sections are dead and not reported.
+.globl test_dead
+.section .test_dead,"ax", at progbits
+test_dead:
+jmp test_dead
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_dead | count 0
+
+## Undefined symbols are considered live, since they are not in dead sections.
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_undef -u test_undef | FileCheck %s --check-prefix=UNDEFINED
+
+# UNDEFINED: live symbol: test_undef
+# UNDEFINED-NOT: >>>
+
+## Defined symbols without input section parents are live.
+.globl test_absolute
+test_absolute = 1234
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_absolute | FileCheck %s --check-prefix=ABSOLUTE
+
+# ABSOLUTE: live symbol: test_absolute
+# ABSOLUTE-NOT: >>>
+
+## Retained sections are intrinsically live, and they make contained symbols live.
+.globl test_retained
+.section .test_retained,"axR", at progbits
+test_retained:
+jmp test_retained
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_retained | FileCheck %s --check-prefix=RETAINED
+
+# RETAINED: live symbol: test_retained
+# RETAINED-NEXT: >>> kept live by {{.*}}:(.test_retained)
+
+## Relocs that reference offsets from sections (e.g., from local symbols) are considered to point to the section if no enclosing symbol exists.
+
+.globl test_section_offset
+.section .test_section_offset,"ax", at progbits
+test_section_offset:
+jmp test_section_offset
+.Llocal:
+jmp test_section_offset
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_section_offset | FileCheck %s --check-prefix=SECTION-OFFSET
+
+# SECTION-OFFSET: live symbol: test_section_offset
+# SECTION-OFFSET-NEXT: >>> kept live by {{.*}}:(.test_section_offset)
+# SECTION-OFFSET-NEXT: >>> kept live by _start
+
+## Relocs that reference offsets from sections (e.g., from local symbols) are considered to point to the enclosing symbol if one exists.
+
+.globl test_section_offset_within_symbol
+.section .test_section_offset_within_symbol,"ax", at progbits
+test_section_offset_within_symbol:
+jmp test_section_offset_within_symbol
+.Llocal_within_symbol:
+jmp test_section_offset_within_symbol
+.size test_section_offset_within_symbol, .-test_section_offset_within_symbol
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_section_offset_within_symbol | FileCheck %s --check-prefix=SECTION-OFFSET-WITHIN-SYMBOL
+
+# SECTION-OFFSET-WITHIN-SYMBOL: live symbol: test_section_offset_within_symbol
+# SECTION-OFFSET-WITHIN-SYMBOL-NEXT: >>> kept live by _start
+
+## Shared symbols
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections %t.so --why-live=test_shared | FileCheck %s --check-prefix=SHARED
+
+# SHARED: live symbol: test_shared
+# SHARED-NEXT: >>> kept live by _start
+
+## Globs match multiple cases. Multiple --why-live flags union.
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections %t.so --why-live=test_s* | FileCheck %s --check-prefix=MULTIPLE
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections %t.so --why-live=test_simple --why-live=test_shared | FileCheck %s --check-prefix=MULTIPLE
+
+# MULTIPLE-DAG: live symbol: test_simple
+# MULTIPLE-DAG: live symbol: test_shared
More information about the llvm-commits
mailing list