[lld] [lld][ELF] Add --why-live flag (inspired by Mach-O) (PR #127112)

Daniel Thornburgh via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 13 11:23:03 PST 2025


https://github.com/mysterymath updated https://github.com/llvm/llvm-project/pull/127112

>From 0f1b04312090e4ac35f2adbd5f789e9c19af8929 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 13 Nov 2024 15:12:55 -0800
Subject: [PATCH] [lld][ELF] Add --why-live flag (inspired by Mach-O)

This prints the stack of reasons that symbols that match the given
glob(s) survived GC. It has no effect unless section GC occurs.

A symbol may be live intrisically, because referenced by another symbol
or section, or because part of a live section. Sections have similar
reasons.

This implementation does not require -ffunction-sections or
-fdata-sections to produce readable results, althought it does tend to
work better (as does GC).
---
 lld/ELF/Config.h        |   1 +
 lld/ELF/Driver.cpp      |   9 +++
 lld/ELF/MarkLive.cpp    | 125 +++++++++++++++++++++++++++++++++----
 lld/ELF/Options.td      |   6 ++
 lld/test/ELF/why-live.s | 132 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 260 insertions(+), 13 deletions(-)
 create mode 100644 lld/test/ELF/why-live.s

diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index b2859486d58e9..12164f5999343 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -223,6 +223,7 @@ struct Config {
   llvm::StringRef thinLTOCacheDir;
   llvm::StringRef thinLTOIndexOnlyArg;
   llvm::StringRef whyExtract;
+  llvm::SmallVector<llvm::GlobPattern, 0> whyLive;
   llvm::StringRef cmseInputLib;
   llvm::StringRef cmseOutputLib;
   StringRef zBtiReport = "none";
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 13e8f8ce6df20..db0b2ea8afcf0 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1472,6 +1472,15 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
   ctx.arg.warnSymbolOrdering =
       args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
   ctx.arg.whyExtract = args.getLastArgValue(OPT_why_extract);
+  for (opt::Arg *arg : args.filtered(OPT_why_live)) {
+    StringRef value(arg->getValue());
+    if (Expected<GlobPattern> pat = GlobPattern::create(arg->getValue())) {
+      ctx.arg.whyLive.emplace_back(std::move(*pat));
+    } else {
+      ErrAlways(ctx) << arg->getSpelling() << ": " << pat.takeError();
+      continue;
+    }
+  }
   ctx.arg.zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
   ctx.arg.zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
   ctx.arg.zForceBti = hasZOption(args, "force-bti");
diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index b6c22884d9176..8e9e385bc26dc 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -29,9 +29,11 @@
 #include "Target.h"
 #include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
+#include "llvm/ADT/DenseMapInfoVariant.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Object/ELF.h"
 #include "llvm/Support/TimeProfiler.h"
+#include <variant>
 #include <vector>
 
 using namespace llvm;
@@ -42,6 +44,10 @@ using namespace lld;
 using namespace lld::elf;
 
 namespace {
+
+// Something that can be the most proximate reason that something else is alive.
+typedef std::variant<InputSectionBase *, Symbol *> LiveReason;
+
 template <class ELFT> class MarkLive {
 public:
   MarkLive(Ctx &ctx, unsigned partition) : ctx(ctx), partition(partition) {}
@@ -50,7 +56,10 @@ template <class ELFT> class MarkLive {
   void moveToMain();
 
 private:
-  void enqueue(InputSectionBase *sec, uint64_t offset);
+  void enqueue(InputSectionBase *sec, uint64_t offset = 0,
+               Symbol *sym = nullptr,
+               std::optional<LiveReason> reason = std::nullopt);
+  void printWhyLive(Symbol *s) const;
   void markSymbol(Symbol *sym);
   void mark();
 
@@ -70,6 +79,12 @@ template <class ELFT> class MarkLive {
   // There are normally few input sections whose names are valid C
   // identifiers, so we just store a SmallVector instead of a multimap.
   DenseMap<StringRef, SmallVector<InputSectionBase *, 0>> cNamedSections;
+
+  // The most proximate reason that something is live. If something doesn't have
+  // a recorded reason, it is either dead, intrinsically live, or an
+  // unreferenced symbol in a live section. (These cases are trivially
+  // detectable and need not be stored.)
+  DenseMap<LiveReason, LiveReason> whyLive;
 };
 } // namespace
 
@@ -101,6 +116,12 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
   Symbol &sym = sec.file->getRelocTargetSym(rel);
   sym.used = true;
 
+  LiveReason reason;
+  if (!ctx.arg.whyLive.empty()) {
+    Defined *reasonSym = sec.getEnclosingSymbol(rel.r_offset);
+    reason = reasonSym ? LiveReason(reasonSym) : LiveReason(&sec);
+  }
+
   if (auto *d = dyn_cast<Defined>(&sym)) {
     auto *relSec = dyn_cast_or_null<InputSectionBase>(d->section);
     if (!relSec)
@@ -119,17 +140,29 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
     // group/SHF_LINK_ORDER rules (b) if the associated text section should be
     // discarded, marking the LSDA will unnecessarily retain the text section.
     if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) ||
-                      relSec->nextInSectionGroup)))
-      enqueue(relSec, offset);
+                      relSec->nextInSectionGroup))) {
+      Symbol *canonicalSym = d;
+      if (!ctx.arg.whyLive.empty() && d->isSection()) {
+        if (Symbol *s = relSec->getEnclosingSymbol(offset))
+          canonicalSym = s;
+        else
+          canonicalSym = nullptr;
+      }
+      enqueue(relSec, offset, canonicalSym, reason);
+    }
     return;
   }
 
-  if (auto *ss = dyn_cast<SharedSymbol>(&sym))
-    if (!ss->isWeak())
+  if (auto *ss = dyn_cast<SharedSymbol>(&sym)) {
+    if (!ss->isWeak()) {
       cast<SharedFile>(ss->file)->isNeeded = true;
+      if (!ctx.arg.whyLive.empty())
+        whyLive.try_emplace(&sym, reason);
+    }
+  }
 
   for (InputSectionBase *sec : cNamedSections.lookup(sym.getName()))
-    enqueue(sec, 0);
+    enqueue(sec, 0, nullptr, reason);
 }
 
 // The .eh_frame section is an unfortunate special case.
@@ -187,7 +220,8 @@ static bool isReserved(InputSectionBase *sec) {
 }
 
 template <class ELFT>
-void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) {
+void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
+                             Symbol *sym, std::optional<LiveReason> reason) {
   // Usually, a whole section is marked as live or dead, but in mergeable
   // (splittable) sections, each piece of data has independent liveness bit.
   // So we explicitly tell it which offset is in use.
@@ -201,15 +235,71 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) {
     return;
   sec->partition = sec->partition ? 1 : partition;
 
+  if (!ctx.arg.whyLive.empty() && reason) {
+    if (sym) {
+      // If a specific symbol is referenced, that makes it alive. It may in turn
+      // make its section alive.
+      whyLive.try_emplace(sym, *reason);
+      whyLive.try_emplace(sec, sym);
+    } else {
+      // Otherwise, the reference generically makes the section live.
+      whyLive.try_emplace(sec, *reason);
+    }
+  }
+
   // Add input section to the queue.
   if (InputSection *s = dyn_cast<InputSection>(sec))
     queue.push_back(s);
 }
 
+// Print the stack of reasons that the given symbol is live.
+template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
+  // Skip dead symbols. A symbol is dead if it belongs to a dead section.
+  if (auto *d = dyn_cast<Defined>(s)) {
+    auto *reason = dyn_cast_or_null<InputSectionBase>(d->section);
+    if (reason && !reason->isLive())
+      return;
+  }
+
+  auto msg = Msg(ctx);
+  msg << "live symbol: " << toStr(ctx, *s);
+
+  LiveReason cur = s;
+  while (true) {
+    auto it = whyLive.find(cur);
+    // If there is a specific reason this object is live...
+    if (it != whyLive.end()) {
+      cur = it->second;
+    } else {
+      // This object is live, but it has no tracked reason. It is either
+      // intrinsically live or an unreferenced symbol in a live section. Return
+      // in the first case.
+      if (!std::holds_alternative<Symbol *>(cur))
+        return;
+      auto *d = dyn_cast<Defined>(std::get<Symbol *>(cur));
+      if (!d)
+        return;
+      auto *reason = dyn_cast_or_null<InputSectionBase>(d->section);
+      if (!reason)
+        return;
+      cur = LiveReason{reason};
+    }
+
+    msg << "\n>>> kept live by ";
+    if (std::holds_alternative<Symbol *>(cur)) {
+      auto *s = std::get<Symbol *>(cur);
+      msg << toStr(ctx, *s);
+    } else {
+      auto *s = std::get<InputSectionBase *>(cur);
+      msg << toStr(ctx, s);
+    }
+  }
+}
+
 template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {
   if (auto *d = dyn_cast_or_null<Defined>(sym))
     if (auto *isec = dyn_cast_or_null<InputSectionBase>(d->section))
-      enqueue(isec, d->value);
+      enqueue(isec, d->value, sym);
 }
 
 // This is the main function of the garbage collector.
@@ -256,7 +346,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
   }
   for (InputSectionBase *sec : ctx.inputSections) {
     if (sec->flags & SHF_GNU_RETAIN) {
-      enqueue(sec, 0);
+      enqueue(sec, 0, nullptr, std::nullopt);
       continue;
     }
     if (sec->flags & SHF_LINK_ORDER)
@@ -295,7 +385,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
     // Preserve special sections and those which are specified in linker
     // script KEEP command.
     if (isReserved(sec) || ctx.script->shouldKeep(sec)) {
-      enqueue(sec, 0);
+      enqueue(sec);
     } else if ((!ctx.arg.zStartStopGC || sec->name.starts_with("__libc_")) &&
                isValidCIdentifier(sec->name)) {
       // As a workaround for glibc libc.a before 2.34
@@ -323,11 +413,20 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
       resolveReloc(sec, rel, false);
 
     for (InputSectionBase *isec : sec.dependentSections)
-      enqueue(isec, 0);
+      enqueue(isec, 0, nullptr, &sec);
 
     // Mark the next group member.
     if (sec.nextInSectionGroup)
-      enqueue(sec.nextInSectionGroup, 0);
+      enqueue(sec.nextInSectionGroup, 0, nullptr, &sec);
+  }
+
+  if (!ctx.arg.whyLive.empty()) {
+    for (Symbol *sym : ctx.symtab->getSymbols()) {
+      if (llvm::any_of(ctx.arg.whyLive, [sym](const llvm::GlobPattern &pat) {
+            return pat.match(sym->getName());
+          }))
+        printWhyLive(sym);
+    }
   }
 }
 
@@ -353,7 +452,7 @@ template <class ELFT> void MarkLive<ELFT>::moveToMain() {
       continue;
     if (ctx.symtab->find(("__start_" + sec->name).str()) ||
         ctx.symtab->find(("__stop_" + sec->name).str()))
-      enqueue(sec, 0);
+      enqueue(sec);
   }
 
   mark();
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c31875305952f..babc84f345b95 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -559,6 +559,12 @@ defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and "
                        "__real_symbol references to symbol">,
             MetaVarName<"<symbol>">;
 
+defm why_live
+    : EEq<"why-live",
+          "Report a chain of references preventing garbage collection for "
+          "each symbol matching <glob>">,
+      MetaVarName<"<glob>">;
+
 def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
   HelpText<"Linker option extensions">;
 
diff --git a/lld/test/ELF/why-live.s b/lld/test/ELF/why-live.s
new file mode 100644
index 0000000000000..12d373cd78d28
--- /dev/null
+++ b/lld/test/ELF/why-live.s
@@ -0,0 +1,132 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -n -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: echo -e ".globl test_shared\n .section .test_shared,\"ax\", at progbits\n test_shared: jmp test_shared" |\
+# RUN:   llvm-mc -n -filetype=obj -triple=x86_64 -o %t.shared.o
+# RUN: ld.lld -shared %t.shared.o -o %t.so
+
+## Simple live section
+.globl _start
+.section ._start,"ax", at progbits
+_start:
+jmp test_simple
+jmp .Llocal
+jmp .Llocal_within_symbol
+jmp test_shared
+.size _start, .-_start
+
+.globl test_simple
+.section .test_simple,"ax", at progbits
+test_simple:
+jmp test_simple
+jmp test_from_unsized
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_simple | FileCheck %s --check-prefix=SIMPLE
+
+# SIMPLE:      live symbol: test_simple
+# SIMPLE-NEXT: >>> kept live by _start
+
+## Live only by being a member of .test_simple
+.globl test_incidental
+test_incidental:
+jmp test_incidental
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_incidental | FileCheck %s --check-prefix=INCIDENTAL
+
+# INCIDENTAL:      live symbol: test_incidental
+# INCIDENTAL-NEXT: >>> kept live by {{.*}}.o:(.test_simple)
+# INCIDENTAL-NEXT: >>> kept live by test_simple
+# INCIDENTAL-NEXT: >>> kept live by _start
+
+## Reached from a reference in section .test_simple directly, since test_simple is an unsized symbol.
+.globl test_from_unsized
+.section .test_from_unsized,"ax", at progbits
+test_from_unsized:
+jmp test_from_unsized
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_from_unsized | FileCheck %s --check-prefix=FROM-UNSIZED
+
+# FROM-UNSIZED:      live symbol: test_from_unsized
+# FROM-UNSIZED-NEXT: >>> kept live by {{.*}}.o:(.test_simple)
+# FROM-UNSIZED-NEXT: >>> kept live by test_simple
+# FROM-UNSIZED-NEXT: >>> kept live by _start
+
+## Symbols in dead sections are dead and not reported.
+.globl test_dead
+.section .test_dead,"ax", at progbits
+test_dead:
+jmp test_dead
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_dead | count 0
+
+## Undefined symbols are considered live, since they are not in dead sections.
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_undef -u test_undef | FileCheck %s --check-prefix=UNDEFINED
+
+# UNDEFINED:     live symbol: test_undef
+# UNDEFINED-NOT: >>>
+
+## Defined symbols without input section parents are live.
+.globl test_absolute
+test_absolute = 1234
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_absolute | FileCheck %s --check-prefix=ABSOLUTE
+
+# ABSOLUTE:     live symbol: test_absolute
+# ABSOLUTE-NOT: >>>
+
+## Retained sections are intrinsically live, and they make contained symbols live.
+.globl test_retained
+.section .test_retained,"axR", at progbits
+test_retained:
+jmp test_retained
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_retained | FileCheck %s --check-prefix=RETAINED
+
+# RETAINED:      live symbol: test_retained
+# RETAINED-NEXT: >>> kept live by {{.*}}:(.test_retained)
+
+## Relocs that reference offsets from sections (e.g., from local symbols) are considered to point to the section if no enclosing symbol exists.
+
+.globl test_section_offset
+.section .test_section_offset,"ax", at progbits
+test_section_offset:
+jmp test_section_offset
+.Llocal:
+jmp test_section_offset
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_section_offset | FileCheck %s --check-prefix=SECTION-OFFSET
+
+# SECTION-OFFSET:        live symbol: test_section_offset
+# SECTION-OFFSET-NEXT:   >>> kept live by {{.*}}:(.test_section_offset)
+# SECTION-OFFSET-NEXT:   >>> kept live by _start
+
+## Relocs that reference offsets from sections (e.g., from local symbols) are considered to point to the enclosing symbol if one exists.
+
+.globl test_section_offset_within_symbol
+.section .test_section_offset_within_symbol,"ax", at progbits
+test_section_offset_within_symbol:
+jmp test_section_offset_within_symbol
+.Llocal_within_symbol:
+jmp test_section_offset_within_symbol
+.size test_section_offset_within_symbol, .-test_section_offset_within_symbol
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_section_offset_within_symbol | FileCheck %s --check-prefix=SECTION-OFFSET-WITHIN-SYMBOL
+
+# SECTION-OFFSET-WITHIN-SYMBOL:        live symbol: test_section_offset_within_symbol
+# SECTION-OFFSET-WITHIN-SYMBOL-NEXT:   >>> kept live by _start
+
+## Shared symbols
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections %t.so --why-live=test_shared | FileCheck %s --check-prefix=SHARED
+
+# SHARED:      live symbol: test_shared
+# SHARED-NEXT: >>> kept live by _start
+
+## Globs match multiple cases. Multiple --why-live flags union.
+
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections %t.so --why-live=test_s* | FileCheck %s --check-prefix=MULTIPLE
+# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections %t.so --why-live=test_simple --why-live=test_shared | FileCheck %s --check-prefix=MULTIPLE
+
+# MULTIPLE-DAG: live symbol: test_simple
+# MULTIPLE-DAG: live symbol: test_shared



More information about the llvm-commits mailing list