[lld] [LLD][ELF] Add --why-live flag to report GC liveness reason (PR #119279)

Daniel Thornburgh via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 29 16:17:22 PST 2025


https://github.com/mysterymath updated https://github.com/llvm/llvm-project/pull/119279

>From 802fc57ddd4291404b009aa7e99d3e7bf83ecd74 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 13 Nov 2024 15:12:55 -0800
Subject: [PATCH 01/30] Pass through parent enqueue section and offset

---
 lld/ELF/MarkLive.cpp | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index b6c22884d91769..04caf531720e4a 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -42,6 +42,11 @@ using namespace lld;
 using namespace lld::elf;
 
 namespace {
+struct LiveParent {
+  InputSectionBase *sec;
+  std::optional<uint64_t> offset;
+};
+
 template <class ELFT> class MarkLive {
 public:
   MarkLive(Ctx &ctx, unsigned partition) : ctx(ctx), partition(partition) {}
@@ -50,7 +55,7 @@ template <class ELFT> class MarkLive {
   void moveToMain();
 
 private:
-  void enqueue(InputSectionBase *sec, uint64_t offset);
+  void enqueue(InputSectionBase *sec, uint64_t offset, std::optional<LiveParent> parent);
   void markSymbol(Symbol *sym);
   void mark();
 
@@ -101,6 +106,8 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
   Symbol &sym = sec.file->getRelocTargetSym(rel);
   sym.used = true;
 
+  LiveParent parent = {&sec, rel.r_offset};
+
   if (auto *d = dyn_cast<Defined>(&sym)) {
     auto *relSec = dyn_cast_or_null<InputSectionBase>(d->section);
     if (!relSec)
@@ -120,7 +127,7 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
     // discarded, marking the LSDA will unnecessarily retain the text section.
     if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) ||
                       relSec->nextInSectionGroup)))
-      enqueue(relSec, offset);
+      enqueue(relSec, offset, parent);
     return;
   }
 
@@ -129,7 +136,7 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
       cast<SharedFile>(ss->file)->isNeeded = true;
 
   for (InputSectionBase *sec : cNamedSections.lookup(sym.getName()))
-    enqueue(sec, 0);
+    enqueue(sec, 0, parent);
 }
 
 // The .eh_frame section is an unfortunate special case.
@@ -187,7 +194,8 @@ static bool isReserved(InputSectionBase *sec) {
 }
 
 template <class ELFT>
-void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) {
+void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
+                             std::optional<LiveParent> parent) {
   // Usually, a whole section is marked as live or dead, but in mergeable
   // (splittable) sections, each piece of data has independent liveness bit.
   // So we explicitly tell it which offset is in use.
@@ -209,7 +217,7 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) {
 template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {
   if (auto *d = dyn_cast_or_null<Defined>(sym))
     if (auto *isec = dyn_cast_or_null<InputSectionBase>(d->section))
-      enqueue(isec, d->value);
+      enqueue(isec, d->value, std::nullopt);
 }
 
 // This is the main function of the garbage collector.
@@ -256,7 +264,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
   }
   for (InputSectionBase *sec : ctx.inputSections) {
     if (sec->flags & SHF_GNU_RETAIN) {
-      enqueue(sec, 0);
+      enqueue(sec, 0, std::nullopt);
       continue;
     }
     if (sec->flags & SHF_LINK_ORDER)
@@ -295,7 +303,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
     // Preserve special sections and those which are specified in linker
     // script KEEP command.
     if (isReserved(sec) || ctx.script->shouldKeep(sec)) {
-      enqueue(sec, 0);
+      enqueue(sec, 0, std::nullopt);
     } else if ((!ctx.arg.zStartStopGC || sec->name.starts_with("__libc_")) &&
                isValidCIdentifier(sec->name)) {
       // As a workaround for glibc libc.a before 2.34
@@ -323,11 +331,11 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
       resolveReloc(sec, rel, false);
 
     for (InputSectionBase *isec : sec.dependentSections)
-      enqueue(isec, 0);
+      enqueue(isec, 0, LiveParent{&sec, std::nullopt});
 
     // Mark the next group member.
     if (sec.nextInSectionGroup)
-      enqueue(sec.nextInSectionGroup, 0);
+      enqueue(sec.nextInSectionGroup, 0, LiveParent{&sec, std::nullopt});
   }
 }
 
@@ -353,7 +361,7 @@ template <class ELFT> void MarkLive<ELFT>::moveToMain() {
       continue;
     if (ctx.symtab->find(("__start_" + sec->name).str()) ||
         ctx.symtab->find(("__stop_" + sec->name).str()))
-      enqueue(sec, 0);
+      enqueue(sec, 0, std::nullopt);
   }
 
   mark();

>From 6d376d5d528ea1151ee3235f403249f45adb0b24 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 27 Nov 2024 16:07:13 -0800
Subject: [PATCH 02/30] Track parents

---
 lld/ELF/MarkLive.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 04caf531720e4a..59603e61f4f3cb 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -75,6 +75,8 @@ template <class ELFT> class MarkLive {
   // There are normally few input sections whose names are valid C
   // identifiers, so we just store a SmallVector instead of a multimap.
   DenseMap<StringRef, SmallVector<InputSectionBase *, 0>> cNamedSections;
+
+  DenseMap<InputSectionBase*, LiveParent> parents;
 };
 } // namespace
 
@@ -209,6 +211,9 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
     return;
   sec->partition = sec->partition ? 1 : partition;
 
+  if (parent)
+    parents.try_emplace(sec, *parent);
+
   // Add input section to the queue.
   if (InputSection *s = dyn_cast<InputSection>(sec))
     queue.push_back(s);

>From 5239cdf819ac98cbf879b1c487c2e6ec4e2e59a8 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Mon, 2 Dec 2024 15:51:10 -0800
Subject: [PATCH 03/30] Recast as LiveOffset

---
 lld/ELF/MarkLive.cpp | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 59603e61f4f3cb..cd2bc456ea2037 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -42,9 +42,13 @@ using namespace lld;
 using namespace lld::elf;
 
 namespace {
-struct LiveParent {
+struct LiveOffset {
   InputSectionBase *sec;
   std::optional<uint64_t> offset;
+
+  LiveOffset(InputSectionBase *sec,
+             std::optional<uint64_t> offset = std::nullopt)
+      : sec(sec), offset(offset) {}
 };
 
 template <class ELFT> class MarkLive {
@@ -55,7 +59,8 @@ template <class ELFT> class MarkLive {
   void moveToMain();
 
 private:
-  void enqueue(InputSectionBase *sec, uint64_t offset, std::optional<LiveParent> parent);
+  void enqueue(InputSectionBase *sec, uint64_t offset,
+               std::optional<LiveOffset> parent);
   void markSymbol(Symbol *sym);
   void mark();
 
@@ -76,7 +81,7 @@ template <class ELFT> class MarkLive {
   // identifiers, so we just store a SmallVector instead of a multimap.
   DenseMap<StringRef, SmallVector<InputSectionBase *, 0>> cNamedSections;
 
-  DenseMap<InputSectionBase*, LiveParent> parents;
+  DenseMap<LiveOffset, LiveOffset> whyLive;
 };
 } // namespace
 
@@ -108,7 +113,7 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
   Symbol &sym = sec.file->getRelocTargetSym(rel);
   sym.used = true;
 
-  LiveParent parent = {&sec, rel.r_offset};
+  LiveOffset parent = {&sec, rel.r_offset};
 
   if (auto *d = dyn_cast<Defined>(&sym)) {
     auto *relSec = dyn_cast_or_null<InputSectionBase>(d->section);
@@ -197,7 +202,7 @@ static bool isReserved(InputSectionBase *sec) {
 
 template <class ELFT>
 void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
-                             std::optional<LiveParent> parent) {
+                             std::optional<LiveOffset> parent) {
   // Usually, a whole section is marked as live or dead, but in mergeable
   // (splittable) sections, each piece of data has independent liveness bit.
   // So we explicitly tell it which offset is in use.
@@ -212,7 +217,7 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
   sec->partition = sec->partition ? 1 : partition;
 
   if (parent)
-    parents.try_emplace(sec, *parent);
+    whyLive.try_emplace(LiveOffset{sec, offset}, *parent);
 
   // Add input section to the queue.
   if (InputSection *s = dyn_cast<InputSection>(sec))
@@ -336,11 +341,11 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
       resolveReloc(sec, rel, false);
 
     for (InputSectionBase *isec : sec.dependentSections)
-      enqueue(isec, 0, LiveParent{&sec, std::nullopt});
+      enqueue(isec, 0, &sec);
 
     // Mark the next group member.
     if (sec.nextInSectionGroup)
-      enqueue(sec.nextInSectionGroup, 0, LiveParent{&sec, std::nullopt});
+      enqueue(sec.nextInSectionGroup, 0, &sec);
   }
 }
 

>From d025c6976ff3d23cead2fe051770a7f9c3497258 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Mon, 2 Dec 2024 16:00:58 -0800
Subject: [PATCH 04/30] Use a pair

---
 lld/ELF/MarkLive.cpp | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index cd2bc456ea2037..90947fc1da2223 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -42,14 +42,7 @@ using namespace lld;
 using namespace lld::elf;
 
 namespace {
-struct LiveOffset {
-  InputSectionBase *sec;
-  std::optional<uint64_t> offset;
-
-  LiveOffset(InputSectionBase *sec,
-             std::optional<uint64_t> offset = std::nullopt)
-      : sec(sec), offset(offset) {}
-};
+typedef std::pair<InputSectionBase *, uint64_t> LiveOffset;
 
 template <class ELFT> class MarkLive {
 public:
@@ -341,11 +334,11 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
       resolveReloc(sec, rel, false);
 
     for (InputSectionBase *isec : sec.dependentSections)
-      enqueue(isec, 0, &sec);
+      enqueue(isec, 0, {{&sec, 0}});
 
     // Mark the next group member.
     if (sec.nextInSectionGroup)
-      enqueue(sec.nextInSectionGroup, 0, &sec);
+      enqueue(sec.nextInSectionGroup, 0, {{&sec, 0}});
   }
 }
 

>From 329c65d1489312726bb6ba966319fe8f531a76c0 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 3 Dec 2024 11:00:50 -0800
Subject: [PATCH 05/30] Also record against the zero offset

---
 lld/ELF/MarkLive.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 90947fc1da2223..869eb87a2c0219 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -209,8 +209,13 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
     return;
   sec->partition = sec->partition ? 1 : partition;
 
-  if (parent)
+  if (parent) {
     whyLive.try_emplace(LiveOffset{sec, offset}, *parent);
+    // Offset zero is treated as a stand-in for the section itself. The parent
+    // is both a specific reason that an offset within this section is alive and
+    // a generic reason the section itself is alive.
+    whyLive.try_emplace(LiveOffset{sec, 0}, *parent);
+  }
 
   // Add input section to the queue.
   if (InputSection *s = dyn_cast<InputSection>(sec))

>From 30be9ebf3ad148bc7805125f082605ba293fba9a Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 3 Dec 2024 11:10:44 -0800
Subject: [PATCH 06/30] Track live objects as either symbols or sections

---
 lld/ELF/MarkLive.cpp | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 869eb87a2c0219..f8cbdd62bf1545 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -29,9 +29,11 @@
 #include "Target.h"
 #include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
+#include "llvm/ADT/DenseMapInfoVariant.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Object/ELF.h"
 #include "llvm/Support/TimeProfiler.h"
+#include <variant>
 #include <vector>
 
 using namespace llvm;
@@ -42,7 +44,7 @@ using namespace lld;
 using namespace lld::elf;
 
 namespace {
-typedef std::pair<InputSectionBase *, uint64_t> LiveOffset;
+typedef std::variant<InputSectionBase *, Defined *> LiveObject;
 
 template <class ELFT> class MarkLive {
 public:
@@ -53,7 +55,7 @@ template <class ELFT> class MarkLive {
 
 private:
   void enqueue(InputSectionBase *sec, uint64_t offset,
-               std::optional<LiveOffset> parent);
+               std::optional<LiveObject> parent);
   void markSymbol(Symbol *sym);
   void mark();
 
@@ -74,7 +76,7 @@ template <class ELFT> class MarkLive {
   // identifiers, so we just store a SmallVector instead of a multimap.
   DenseMap<StringRef, SmallVector<InputSectionBase *, 0>> cNamedSections;
 
-  DenseMap<LiveOffset, LiveOffset> whyLive;
+  DenseMap<LiveObject, LiveObject> whyLive;
 };
 } // namespace
 
@@ -106,7 +108,8 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
   Symbol &sym = sec.file->getRelocTargetSym(rel);
   sym.used = true;
 
-  LiveOffset parent = {&sec, rel.r_offset};
+  Defined *parentSym = sec.getEnclosingSymbol(rel.r_offset);
+  auto parent = parentSym ? LiveObject(parentSym) : LiveObject(&sec);
 
   if (auto *d = dyn_cast<Defined>(&sym)) {
     auto *relSec = dyn_cast_or_null<InputSectionBase>(d->section);
@@ -195,7 +198,7 @@ static bool isReserved(InputSectionBase *sec) {
 
 template <class ELFT>
 void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
-                             std::optional<LiveOffset> parent) {
+                             std::optional<LiveObject> parent) {
   // Usually, a whole section is marked as live or dead, but in mergeable
   // (splittable) sections, each piece of data has independent liveness bit.
   // So we explicitly tell it which offset is in use.
@@ -210,11 +213,10 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
   sec->partition = sec->partition ? 1 : partition;
 
   if (parent) {
-    whyLive.try_emplace(LiveOffset{sec, offset}, *parent);
-    // Offset zero is treated as a stand-in for the section itself. The parent
-    // is both a specific reason that an offset within this section is alive and
-    // a generic reason the section itself is alive.
-    whyLive.try_emplace(LiveOffset{sec, 0}, *parent);
+    whyLive.try_emplace(sec, *parent);
+    Defined *sym = sec->getEnclosingSymbol(offset);
+    if (sym)
+      whyLive.try_emplace(sym, *parent);
   }
 
   // Add input section to the queue.
@@ -339,11 +341,11 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
       resolveReloc(sec, rel, false);
 
     for (InputSectionBase *isec : sec.dependentSections)
-      enqueue(isec, 0, {{&sec, 0}});
+      enqueue(isec, 0, &sec);
 
     // Mark the next group member.
     if (sec.nextInSectionGroup)
-      enqueue(sec.nextInSectionGroup, 0, {{&sec, 0}});
+      enqueue(sec.nextInSectionGroup, 0, &sec);
   }
 }
 

>From 427f6f5a0f737de6a65d44b5e9d1df44565eca5e Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 3 Dec 2024 11:26:04 -0800
Subject: [PATCH 07/30] Be clear about the zero offset vs section distinction

---
 lld/ELF/MarkLive.cpp | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index f8cbdd62bf1545..4c67efe74334f5 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -54,7 +54,7 @@ template <class ELFT> class MarkLive {
   void moveToMain();
 
 private:
-  void enqueue(InputSectionBase *sec, uint64_t offset,
+  void enqueue(InputSectionBase *sec, std::optional<uint64_t> offset,
                std::optional<LiveObject> parent);
   void markSymbol(Symbol *sym);
   void mark();
@@ -139,7 +139,7 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
       cast<SharedFile>(ss->file)->isNeeded = true;
 
   for (InputSectionBase *sec : cNamedSections.lookup(sym.getName()))
-    enqueue(sec, 0, parent);
+    enqueue(sec, std::nullopt, parent);
 }
 
 // The .eh_frame section is an unfortunate special case.
@@ -197,13 +197,14 @@ static bool isReserved(InputSectionBase *sec) {
 }
 
 template <class ELFT>
-void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
+void MarkLive<ELFT>::enqueue(InputSectionBase *sec,
+                             std::optional<uint64_t> offset,
                              std::optional<LiveObject> parent) {
   // Usually, a whole section is marked as live or dead, but in mergeable
   // (splittable) sections, each piece of data has independent liveness bit.
   // So we explicitly tell it which offset is in use.
   if (auto *ms = dyn_cast<MergeInputSection>(sec))
-    ms->getSectionPiece(offset).live = true;
+    ms->getSectionPiece(offset.value_or(0)).live = true;
 
   // Set Sec->Partition to the meet (i.e. the "minimum") of Partition and
   // Sec->Partition in the following lattice: 1 < other < 0. If Sec->Partition
@@ -214,9 +215,11 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
 
   if (parent) {
     whyLive.try_emplace(sec, *parent);
-    Defined *sym = sec->getEnclosingSymbol(offset);
-    if (sym)
-      whyLive.try_emplace(sym, *parent);
+    if (offset) {
+      Defined *sym = sec->getEnclosingSymbol(*offset);
+      if (sym)
+        whyLive.try_emplace(sym, *parent);
+    }
   }
 
   // Add input section to the queue.
@@ -274,7 +277,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
   }
   for (InputSectionBase *sec : ctx.inputSections) {
     if (sec->flags & SHF_GNU_RETAIN) {
-      enqueue(sec, 0, std::nullopt);
+      enqueue(sec, std::nullopt, std::nullopt);
       continue;
     }
     if (sec->flags & SHF_LINK_ORDER)
@@ -313,7 +316,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
     // Preserve special sections and those which are specified in linker
     // script KEEP command.
     if (isReserved(sec) || ctx.script->shouldKeep(sec)) {
-      enqueue(sec, 0, std::nullopt);
+      enqueue(sec, std::nullopt, std::nullopt);
     } else if ((!ctx.arg.zStartStopGC || sec->name.starts_with("__libc_")) &&
                isValidCIdentifier(sec->name)) {
       // As a workaround for glibc libc.a before 2.34
@@ -341,11 +344,11 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
       resolveReloc(sec, rel, false);
 
     for (InputSectionBase *isec : sec.dependentSections)
-      enqueue(isec, 0, &sec);
+      enqueue(isec, std::nullopt, &sec);
 
     // Mark the next group member.
     if (sec.nextInSectionGroup)
-      enqueue(sec.nextInSectionGroup, 0, &sec);
+      enqueue(sec.nextInSectionGroup, std::nullopt, &sec);
   }
 }
 
@@ -371,7 +374,7 @@ template <class ELFT> void MarkLive<ELFT>::moveToMain() {
       continue;
     if (ctx.symtab->find(("__start_" + sec->name).str()) ||
         ctx.symtab->find(("__stop_" + sec->name).str()))
-      enqueue(sec, 0, std::nullopt);
+      enqueue(sec, std::nullopt, std::nullopt);
   }
 
   mark();

>From b9378f5a7e176926f49e44ef684a0218e77cc4e8 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 3 Dec 2024 11:37:59 -0800
Subject: [PATCH 08/30] Track shared symbols too

---
 lld/ELF/MarkLive.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 4c67efe74334f5..c0c6b69aab1d3d 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -44,7 +44,7 @@ using namespace lld;
 using namespace lld::elf;
 
 namespace {
-typedef std::variant<InputSectionBase *, Defined *> LiveObject;
+typedef std::variant<InputSectionBase *, Symbol *> LiveObject;
 
 template <class ELFT> class MarkLive {
 public:
@@ -134,9 +134,12 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
     return;
   }
 
-  if (auto *ss = dyn_cast<SharedSymbol>(&sym))
-    if (!ss->isWeak())
+  if (auto *ss = dyn_cast<SharedSymbol>(&sym)) {
+    if (!ss->isWeak()) {
       cast<SharedFile>(ss->file)->isNeeded = true;
+      whyLive.try_emplace(&sym, parent);
+    }
+  }
 
   for (InputSectionBase *sec : cNamedSections.lookup(sym.getName()))
     enqueue(sec, std::nullopt, parent);

>From 372760c321e6e751d698683dc6d09801edd86d8a Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 3 Dec 2024 14:49:45 -0800
Subject: [PATCH 09/30] Hax

---
 lld/ELF/MarkLive.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index c0c6b69aab1d3d..a0f7deec45166b 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -56,6 +56,7 @@ template <class ELFT> class MarkLive {
 private:
   void enqueue(InputSectionBase *sec, std::optional<uint64_t> offset,
                std::optional<LiveObject> parent);
+  void printWhyLive(const Symbol *s) const;
   void markSymbol(Symbol *sym);
   void mark();
 
@@ -230,6 +231,10 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec,
     queue.push_back(s);
 }
 
+template <class ELFT>
+void MarkLive<ELFT>::printWhyLive(const Symbol *s) const {
+}
+
 template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {
   if (auto *d = dyn_cast_or_null<Defined>(sym))
     if (auto *isec = dyn_cast_or_null<InputSectionBase>(d->section))
@@ -353,6 +358,8 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
     if (sec.nextInSectionGroup)
       enqueue(sec.nextInSectionGroup, std::nullopt, &sec);
   }
+
+  printWhyLive(ctx.symtab->find("foo"));
 }
 
 // Move the sections for some symbols to the main partition, specifically ifuncs

>From 33d45dabec5e48a9812435ac5da6ed72f9bba0e3 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 4 Dec 2024 14:11:31 -0800
Subject: [PATCH 10/30] Add simple why-live printing fn

---
 lld/ELF/MarkLive.cpp | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index a0f7deec45166b..a5549e93f6a7b7 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -56,7 +56,7 @@ template <class ELFT> class MarkLive {
 private:
   void enqueue(InputSectionBase *sec, std::optional<uint64_t> offset,
                std::optional<LiveObject> parent);
-  void printWhyLive(const Symbol *s) const;
+  void printWhyLive(Symbol *s) const;
   void markSymbol(Symbol *sym);
   void mark();
 
@@ -231,8 +231,29 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec,
     queue.push_back(s);
 }
 
-template <class ELFT>
-void MarkLive<ELFT>::printWhyLive(const Symbol *s) const {
+template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
+  std::string out = toString(*s) + " from " + toString(s->file);
+  int indent = 2;
+  LiveObject cur = s;
+  while (true) {
+    auto it = whyLive.find(cur);
+    if (it == whyLive.end())
+      if (auto *d = dyn_cast<Defined>(s))
+        it = whyLive.find(LiveObject{d->section});
+    if (it == whyLive.end())
+      break;
+    cur = it->second;
+    out += "\n" + std::string(indent, ' ');
+    if (std::holds_alternative<Symbol *>(cur)) {
+      auto *s = std::get<Symbol *>(cur);
+      out += toString(*s) + " from " + toString(s->file);
+    } else {
+      auto *s = std::get<InputSectionBase *>(cur);
+      // TODO: Fancy formatting
+      out += toString(s);
+    }
+  }
+  message(out);
 }
 
 template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {

>From b0829a0243c34c42aa8ed26a636bec6e8007ba94 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 4 Dec 2024 14:13:23 -0800
Subject: [PATCH 11/30] Add missing cast

---
 lld/ELF/MarkLive.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index a5549e93f6a7b7..a7510a361ed458 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -239,7 +239,8 @@ template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
     auto it = whyLive.find(cur);
     if (it == whyLive.end())
       if (auto *d = dyn_cast<Defined>(s))
-        it = whyLive.find(LiveObject{d->section});
+        if (auto *s = dyn_cast<InputSectionBase>(d->section))
+          it = whyLive.find(LiveObject{s});
     if (it == whyLive.end())
       break;
     cur = it->second;

>From c9599fa60ef58d1daebebb2c14c791d192af2965 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 4 Dec 2024 15:14:39 -0800
Subject: [PATCH 12/30] Explicitly mark roots as roots to break cycles

---
 lld/ELF/MarkLive.cpp | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index a7510a361ed458..5a57765dcfca02 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -54,8 +54,9 @@ template <class ELFT> class MarkLive {
   void moveToMain();
 
 private:
-  void enqueue(InputSectionBase *sec, std::optional<uint64_t> offset,
-               std::optional<LiveObject> parent);
+  void enqueue(InputSectionBase *sec,
+               std::optional<uint64_t> offset = std::nullopt,
+               std::optional<LiveObject> parent = std::nullopt);
   void printWhyLive(Symbol *s) const;
   void markSymbol(Symbol *sym);
   void mark();
@@ -77,7 +78,7 @@ template <class ELFT> class MarkLive {
   // identifiers, so we just store a SmallVector instead of a multimap.
   DenseMap<StringRef, SmallVector<InputSectionBase *, 0>> cNamedSections;
 
-  DenseMap<LiveObject, LiveObject> whyLive;
+  DenseMap<LiveObject, std::optional<LiveObject>> whyLive;
 };
 } // namespace
 
@@ -143,7 +144,7 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
   }
 
   for (InputSectionBase *sec : cNamedSections.lookup(sym.getName()))
-    enqueue(sec, std::nullopt, parent);
+    enqueue(sec);
 }
 
 // The .eh_frame section is an unfortunate special case.
@@ -217,13 +218,11 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec,
     return;
   sec->partition = sec->partition ? 1 : partition;
 
-  if (parent) {
-    whyLive.try_emplace(sec, *parent);
-    if (offset) {
-      Defined *sym = sec->getEnclosingSymbol(*offset);
-      if (sym)
-        whyLive.try_emplace(sym, *parent);
-    }
+  whyLive.try_emplace(sec, parent);
+  if (offset) {
+    Defined *sym = sec->getEnclosingSymbol(*offset);
+    if (sym)
+      whyLive.try_emplace(sym, parent);
   }
 
   // Add input section to the queue.
@@ -241,9 +240,11 @@ template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
       if (auto *d = dyn_cast<Defined>(s))
         if (auto *s = dyn_cast<InputSectionBase>(d->section))
           it = whyLive.find(LiveObject{s});
-    if (it == whyLive.end())
+    assert(it != whyLive.end() &&
+           "all live objects should have a tracked reason for being live");
+    if (!it->second)
       break;
-    cur = it->second;
+    cur = *it->second;
     out += "\n" + std::string(indent, ' ');
     if (std::holds_alternative<Symbol *>(cur)) {
       auto *s = std::get<Symbol *>(cur);
@@ -260,7 +261,7 @@ template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
 template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {
   if (auto *d = dyn_cast_or_null<Defined>(sym))
     if (auto *isec = dyn_cast_or_null<InputSectionBase>(d->section))
-      enqueue(isec, d->value, std::nullopt);
+      enqueue(isec, d->value);
 }
 
 // This is the main function of the garbage collector.
@@ -307,7 +308,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
   }
   for (InputSectionBase *sec : ctx.inputSections) {
     if (sec->flags & SHF_GNU_RETAIN) {
-      enqueue(sec, std::nullopt, std::nullopt);
+      enqueue(sec);
       continue;
     }
     if (sec->flags & SHF_LINK_ORDER)
@@ -346,7 +347,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
     // Preserve special sections and those which are specified in linker
     // script KEEP command.
     if (isReserved(sec) || ctx.script->shouldKeep(sec)) {
-      enqueue(sec, std::nullopt, std::nullopt);
+      enqueue(sec);
     } else if ((!ctx.arg.zStartStopGC || sec->name.starts_with("__libc_")) &&
                isValidCIdentifier(sec->name)) {
       // As a workaround for glibc libc.a before 2.34
@@ -406,7 +407,7 @@ template <class ELFT> void MarkLive<ELFT>::moveToMain() {
       continue;
     if (ctx.symtab->find(("__start_" + sec->name).str()) ||
         ctx.symtab->find(("__stop_" + sec->name).str()))
-      enqueue(sec, std::nullopt, std::nullopt);
+      enqueue(sec);
   }
 
   mark();

>From b127efb83b3e25204efd7b65d7cb9fad08896d42 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 4 Dec 2024 15:26:49 -0800
Subject: [PATCH 13/30] Find bar actually, not foo

---
 lld/ELF/MarkLive.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 5a57765dcfca02..10c572060da108 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -382,7 +382,7 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
       enqueue(sec.nextInSectionGroup, std::nullopt, &sec);
   }
 
-  printWhyLive(ctx.symtab->find("foo"));
+  printWhyLive(ctx.symtab->find("bar"));
 }
 
 // Move the sections for some symbols to the main partition, specifically ifuncs

>From 584dfe8b98afc29a45a3cc0b65b97db8d9fa6473 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 4 Dec 2024 15:38:53 -0800
Subject: [PATCH 14/30] Report section membership as a reason for being alive

---
 lld/ELF/MarkLive.cpp | 43 +++++++++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 10c572060da108..8287ce3f51af8f 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -231,29 +231,36 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec,
 }
 
 template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
-  std::string out = toString(*s) + " from " + toString(s->file);
-  int indent = 2;
-  LiveObject cur = s;
-  while (true) {
-    auto it = whyLive.find(cur);
-    if (it == whyLive.end())
-      if (auto *d = dyn_cast<Defined>(s))
-        if (auto *s = dyn_cast<InputSectionBase>(d->section))
-          it = whyLive.find(LiveObject{s});
-    assert(it != whyLive.end() &&
-           "all live objects should have a tracked reason for being live");
-    if (!it->second)
-      break;
-    cur = *it->second;
-    out += "\n" + std::string(indent, ' ');
-    if (std::holds_alternative<Symbol *>(cur)) {
-      auto *s = std::get<Symbol *>(cur);
+  std::string out;
+  int indent = 0;
+  for (std::optional<LiveObject> cur = s; cur; indent += 2) {
+    if (indent)
+      out += "\n" + std::string(indent, ' ');
+    if (std::holds_alternative<Symbol *>(*cur)) {
+      auto *s = std::get<Symbol *>(*cur);
       out += toString(*s) + " from " + toString(s->file);
     } else {
-      auto *s = std::get<InputSectionBase *>(cur);
+      auto *s = std::get<InputSectionBase *>(*cur);
       // TODO: Fancy formatting
       out += toString(s);
     }
+
+    auto it = whyLive.find(*cur);
+    if (it != whyLive.end()) {
+      // If there is a specific reason this object is live, report it.
+      if (!it->second)
+        break;
+      cur = *it->second;
+    } else {
+      // This object is live merely by being a member of its parent section, so
+      // report the parent.
+      InputSectionBase *parent = nullptr;
+      if (auto *d = dyn_cast<Defined>(s))
+        parent = dyn_cast<InputSectionBase>(d->section);
+      assert(parent &&
+             "all live objects should have a tracked reason for being live");
+      cur = LiveObject{parent};
+    }
   }
   message(out);
 }

>From 3bcd86789bf57b59a8e19c40af4a8d1eac3063ab Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 4 Dec 2024 16:03:17 -0800
Subject: [PATCH 15/30] If a specific symbol is referenced, it's the reason its
 section is alive

---
 lld/ELF/MarkLive.cpp | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 8287ce3f51af8f..17447330941004 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -218,11 +218,17 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec,
     return;
   sec->partition = sec->partition ? 1 : partition;
 
-  whyLive.try_emplace(sec, parent);
-  if (offset) {
-    Defined *sym = sec->getEnclosingSymbol(*offset);
-    if (sym)
-      whyLive.try_emplace(sym, parent);
+  Defined *sym = nullptr;
+  if (offset)
+    sym = sec->getEnclosingSymbol(*offset);
+  if (sym) {
+    // If a specific symbol is referenced, the parent makes it alive, and it
+    // (may) makes its section alive.
+    whyLive.try_emplace(sym, parent);
+    whyLive.try_emplace(sec, sym);
+  } else {
+    // Otherwise, the parent generically makes the section itself live.
+    whyLive.try_emplace(sec, parent);
   }
 
   // Add input section to the queue.

>From 94a480843697a9905e020e7fbc6d96b73b74ee6b Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <mysterymath at gmail.com>
Date: Thu, 5 Dec 2024 12:21:51 -0800
Subject: [PATCH 16/30] Determine canonical symbol for a reference

---
 lld/ELF/MarkLive.cpp | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 17447330941004..14f4694f69772a 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -54,8 +54,8 @@ template <class ELFT> class MarkLive {
   void moveToMain();
 
 private:
-  void enqueue(InputSectionBase *sec,
-               std::optional<uint64_t> offset = std::nullopt,
+  void enqueue(InputSectionBase *sec, uint64_t offset = 0,
+               Symbol *sym = nullptr,
                std::optional<LiveObject> parent = std::nullopt);
   void printWhyLive(Symbol *s) const;
   void markSymbol(Symbol *sym);
@@ -131,8 +131,13 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
     // group/SHF_LINK_ORDER rules (b) if the associated text section should be
     // discarded, marking the LSDA will unnecessarily retain the text section.
     if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) ||
-                      relSec->nextInSectionGroup)))
-      enqueue(relSec, offset, parent);
+                      relSec->nextInSectionGroup))) {
+      Symbol *canonicalSym = d;
+      if (offset >= d->value + d->size)
+        if (Symbol *s = relSec->getEnclosingSymbol(offset))
+          canonicalSym = s;
+      enqueue(relSec, offset, canonicalSym, parent);
+    }
     return;
   }
 
@@ -202,14 +207,13 @@ static bool isReserved(InputSectionBase *sec) {
 }
 
 template <class ELFT>
-void MarkLive<ELFT>::enqueue(InputSectionBase *sec,
-                             std::optional<uint64_t> offset,
-                             std::optional<LiveObject> parent) {
+void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
+                             Symbol *sym, std::optional<LiveObject> parent) {
   // Usually, a whole section is marked as live or dead, but in mergeable
   // (splittable) sections, each piece of data has independent liveness bit.
   // So we explicitly tell it which offset is in use.
   if (auto *ms = dyn_cast<MergeInputSection>(sec))
-    ms->getSectionPiece(offset.value_or(0)).live = true;
+    ms->getSectionPiece(offset).live = true;
 
   // Set Sec->Partition to the meet (i.e. the "minimum") of Partition and
   // Sec->Partition in the following lattice: 1 < other < 0. If Sec->Partition
@@ -218,9 +222,6 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec,
     return;
   sec->partition = sec->partition ? 1 : partition;
 
-  Defined *sym = nullptr;
-  if (offset)
-    sym = sec->getEnclosingSymbol(*offset);
   if (sym) {
     // If a specific symbol is referenced, the parent makes it alive, and it
     // (may) makes its section alive.
@@ -388,11 +389,11 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
       resolveReloc(sec, rel, false);
 
     for (InputSectionBase *isec : sec.dependentSections)
-      enqueue(isec, std::nullopt, &sec);
+      enqueue(isec, 0, nullptr, &sec);
 
     // Mark the next group member.
     if (sec.nextInSectionGroup)
-      enqueue(sec.nextInSectionGroup, std::nullopt, &sec);
+      enqueue(sec.nextInSectionGroup, 0, nullptr, &sec);
   }
 
   printWhyLive(ctx.symtab->find("bar"));

>From 22d47b722e1ad57c592b238fe9bde40fb50699a8 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <mysterymath at gmail.com>
Date: Thu, 5 Dec 2024 12:33:45 -0800
Subject: [PATCH 17/30] Refer to sections rather than STT_SECTION symbols

---
 lld/ELF/MarkLive.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 14f4694f69772a..4ff4616a110319 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -136,6 +136,8 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
       if (offset >= d->value + d->size)
         if (Symbol *s = relSec->getEnclosingSymbol(offset))
           canonicalSym = s;
+      if (canonicalSym->isSection())
+        canonicalSym = nullptr;
       enqueue(relSec, offset, canonicalSym, parent);
     }
     return;

>From e956dd56dced83d90a6b8ac0ae93c5ce8f748e42 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <mysterymath at gmail.com>
Date: Thu, 5 Dec 2024 12:43:35 -0800
Subject: [PATCH 18/30] Encode parent for named sections

---
 lld/ELF/MarkLive.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 4ff4616a110319..f3af48216111d9 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -151,7 +151,7 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
   }
 
   for (InputSectionBase *sec : cNamedSections.lookup(sym.getName()))
-    enqueue(sec);
+    enqueue(sec, 0, nullptr, parent);
 }
 
 // The .eh_frame section is an unfortunate special case.

>From b7fecebe09c35fe03b5a0b38503c2d57a6d6bbe6 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <mysterymath at gmail.com>
Date: Thu, 5 Dec 2024 12:47:18 -0800
Subject: [PATCH 19/30] No defaults; was missing things

---
 lld/ELF/MarkLive.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index f3af48216111d9..79834da67f5bce 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -54,9 +54,8 @@ template <class ELFT> class MarkLive {
   void moveToMain();
 
 private:
-  void enqueue(InputSectionBase *sec, uint64_t offset = 0,
-               Symbol *sym = nullptr,
-               std::optional<LiveObject> parent = std::nullopt);
+  void enqueue(InputSectionBase *sec, uint64_t offset, Symbol *sym,
+               std::optional<LiveObject> parent);
   void printWhyLive(Symbol *s) const;
   void markSymbol(Symbol *sym);
   void mark();
@@ -277,7 +276,7 @@ template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
 template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {
   if (auto *d = dyn_cast_or_null<Defined>(sym))
     if (auto *isec = dyn_cast_or_null<InputSectionBase>(d->section))
-      enqueue(isec, d->value);
+      enqueue(isec, d->value, sym, std::nullopt);
 }
 
 // This is the main function of the garbage collector.
@@ -324,7 +323,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
   }
   for (InputSectionBase *sec : ctx.inputSections) {
     if (sec->flags & SHF_GNU_RETAIN) {
-      enqueue(sec);
+      enqueue(sec, 0, nullptr, std::nullopt);
       continue;
     }
     if (sec->flags & SHF_LINK_ORDER)
@@ -363,7 +362,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
     // Preserve special sections and those which are specified in linker
     // script KEEP command.
     if (isReserved(sec) || ctx.script->shouldKeep(sec)) {
-      enqueue(sec);
+      enqueue(sec, 0, nullptr, std::nullopt);
     } else if ((!ctx.arg.zStartStopGC || sec->name.starts_with("__libc_")) &&
                isValidCIdentifier(sec->name)) {
       // As a workaround for glibc libc.a before 2.34
@@ -423,7 +422,7 @@ template <class ELFT> void MarkLive<ELFT>::moveToMain() {
       continue;
     if (ctx.symtab->find(("__start_" + sec->name).str()) ||
         ctx.symtab->find(("__stop_" + sec->name).str()))
-      enqueue(sec);
+      enqueue(sec, 0, nullptr, std::nullopt);
   }
 
   mark();

>From 715e0683ded64cdf58cf5b0d1979aabfe691c08b Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Fri, 6 Dec 2024 15:31:52 -0800
Subject: [PATCH 20/30] Add why-live option

---
 lld/ELF/Options.td | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c31875305952fb..93c7293c2ee151 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -559,6 +559,10 @@ defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and "
                        "__real_symbol references to symbol">,
             MetaVarName<"<symbol>">;
 
+defm why_live : EEq<"why-live", "Report a chain of references to <symbol-glob> that keeps it from "
+                                "being garbage collected">,
+                MetaVarName<"<symbol-glob>">;
+
 def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
   HelpText<"Linker option extensions">;
 

>From 6484056b14232501510ffc66aeac70ee2d98a06f Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Fri, 6 Dec 2024 15:34:06 -0800
Subject: [PATCH 21/30] toStr

---
 lld/ELF/MarkLive.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 79834da67f5bce..025ff9b728ab62 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -246,11 +246,11 @@ template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
       out += "\n" + std::string(indent, ' ');
     if (std::holds_alternative<Symbol *>(*cur)) {
       auto *s = std::get<Symbol *>(*cur);
-      out += toString(*s) + " from " + toString(s->file);
+      out += toStr(ctx, *s) + " from " + toStr(ctx, s->file);
     } else {
       auto *s = std::get<InputSectionBase *>(*cur);
       // TODO: Fancy formatting
-      out += toString(s);
+      out += toStr(ctx, s);
     }
 
     auto it = whyLive.find(*cur);

>From b42bd9474f263180f652fcbb939b95078c539462 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Fri, 6 Dec 2024 15:36:35 -0800
Subject: [PATCH 22/30] Better wording

---
 lld/ELF/Options.td | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 93c7293c2ee151..2e7ee35e762f2d 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -559,9 +559,9 @@ defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and "
                        "__real_symbol references to symbol">,
             MetaVarName<"<symbol>">;
 
-defm why_live : EEq<"why-live", "Report a chain of references to <symbol-glob> that keeps it from "
-                                "being garbage collected">,
-                MetaVarName<"<symbol-glob>">;
+defm why_live : EEq<"why-live", "Report a chain of references preventing garbage collection for "
+                                "each symbol matching <glob>">,
+                MetaVarName<"<glob>">;
 
 def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
   HelpText<"Linker option extensions">;

>From 7bca5d534c56e5b2c08a8886e5dcadc2b6d87193 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Mon, 9 Dec 2024 11:51:12 -0800
Subject: [PATCH 23/30] Parse whylive arg

---
 lld/ELF/Config.h   | 1 +
 lld/ELF/Driver.cpp | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index b2859486d58e93..12164f59993436 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -223,6 +223,7 @@ struct Config {
   llvm::StringRef thinLTOCacheDir;
   llvm::StringRef thinLTOIndexOnlyArg;
   llvm::StringRef whyExtract;
+  llvm::SmallVector<llvm::GlobPattern, 0> whyLive;
   llvm::StringRef cmseInputLib;
   llvm::StringRef cmseOutputLib;
   StringRef zBtiReport = "none";
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 13e8f8ce6df207..db0b2ea8afcf0a 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1472,6 +1472,15 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
   ctx.arg.warnSymbolOrdering =
       args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
   ctx.arg.whyExtract = args.getLastArgValue(OPT_why_extract);
+  for (opt::Arg *arg : args.filtered(OPT_why_live)) {
+    StringRef value(arg->getValue());
+    if (Expected<GlobPattern> pat = GlobPattern::create(arg->getValue())) {
+      ctx.arg.whyLive.emplace_back(std::move(*pat));
+    } else {
+      ErrAlways(ctx) << arg->getSpelling() << ": " << pat.takeError();
+      continue;
+    }
+  }
   ctx.arg.zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
   ctx.arg.zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
   ctx.arg.zForceBti = hasZOption(args, "force-bti");

>From e26bbf21ae29f2502d2b87b4aa616a8d8eaf93c0 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Mon, 9 Dec 2024 12:18:28 -0800
Subject: [PATCH 24/30] Connect whylive pattern matching

---
 lld/ELF/MarkLive.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 025ff9b728ab62..952fa7b9fe2996 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -397,8 +397,13 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
       enqueue(sec.nextInSectionGroup, 0, nullptr, &sec);
   }
 
-  printWhyLive(ctx.symtab->find("bar"));
-}
+  for (Symbol *sym : ctx.symtab->getSymbols()) {
+    if (llvm::any_of(ctx.arg.whyLive, [sym](const llvm::GlobPattern &pat) {
+          return pat.match(sym->getName());
+        }))
+      printWhyLive(sym);
+  }
+  }
 
 // Move the sections for some symbols to the main partition, specifically ifuncs
 // (because they can result in an IRELATIVE being added to the main partition's

>From 932cb8cefd0772fc3f703ffb895d28ba5471beec Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Mon, 9 Dec 2024 14:30:19 -0800
Subject: [PATCH 25/30] Don't trigger assertion for dead symbols

---
 lld/ELF/MarkLive.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 952fa7b9fe2996..27131d04378093 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -241,6 +241,8 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
 template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
   std::string out;
   int indent = 0;
+  if (!whyLive.contains(s))
+    return;
   for (std::optional<LiveObject> cur = s; cur; indent += 2) {
     if (indent)
       out += "\n" + std::string(indent, ' ');
@@ -403,7 +405,7 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
         }))
       printWhyLive(sym);
   }
-  }
+}
 
 // Move the sections for some symbols to the main partition, specifically ifuncs
 // (because they can result in an IRELATIVE being added to the main partition's

>From cf794154fc7db9b9b61ab36d508dddbc736f0797 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Thu, 19 Dec 2024 14:14:09 -0800
Subject: [PATCH 26/30] Optional args

---
 lld/ELF/MarkLive.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 27131d04378093..1b05b5845a29e6 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -54,8 +54,9 @@ template <class ELFT> class MarkLive {
   void moveToMain();
 
 private:
-  void enqueue(InputSectionBase *sec, uint64_t offset, Symbol *sym,
-               std::optional<LiveObject> parent);
+  void enqueue(InputSectionBase *sec, uint64_t offset = 0,
+               Symbol *sym = nullptr,
+               std::optional<LiveObject> parent = std::nullopt);
   void printWhyLive(Symbol *s) const;
   void markSymbol(Symbol *sym);
   void mark();
@@ -278,7 +279,7 @@ template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
 template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {
   if (auto *d = dyn_cast_or_null<Defined>(sym))
     if (auto *isec = dyn_cast_or_null<InputSectionBase>(d->section))
-      enqueue(isec, d->value, sym, std::nullopt);
+      enqueue(isec, d->value, sym);
 }
 
 // This is the main function of the garbage collector.
@@ -364,7 +365,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
     // Preserve special sections and those which are specified in linker
     // script KEEP command.
     if (isReserved(sec) || ctx.script->shouldKeep(sec)) {
-      enqueue(sec, 0, nullptr, std::nullopt);
+      enqueue(sec);
     } else if ((!ctx.arg.zStartStopGC || sec->name.starts_with("__libc_")) &&
                isValidCIdentifier(sec->name)) {
       // As a workaround for glibc libc.a before 2.34
@@ -429,7 +430,7 @@ template <class ELFT> void MarkLive<ELFT>::moveToMain() {
       continue;
     if (ctx.symtab->find(("__start_" + sec->name).str()) ||
         ctx.symtab->find(("__stop_" + sec->name).str()))
-      enqueue(sec, 0, nullptr, std::nullopt);
+      enqueue(sec);
   }
 
   mark();

>From 6c0232b5ab1bd82c219fed2d1f9b5763efa0b79d Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Thu, 19 Dec 2024 14:25:53 -0800
Subject: [PATCH 27/30] Simplify and correct canonical symbol detection

---
 lld/ELF/MarkLive.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 1b05b5845a29e6..7c98ea00acd64f 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -132,12 +132,13 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
     // discarded, marking the LSDA will unnecessarily retain the text section.
     if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) ||
                       relSec->nextInSectionGroup))) {
-      Symbol *canonicalSym = d;
-      if (offset >= d->value + d->size)
-        if (Symbol *s = relSec->getEnclosingSymbol(offset))
+      Symbol *canonicalSym = nullptr;
+      if (!d->isSection()) {
+        if (offset < d->value + d->size)
+          canonicalSym = d;
+        else if (Symbol *s = relSec->getEnclosingSymbol(offset))
           canonicalSym = s;
-      if (canonicalSym->isSection())
-        canonicalSym = nullptr;
+      }
       enqueue(relSec, offset, canonicalSym, parent);
     }
     return;

>From bf7cb603e3687946ffe58db23a39643735722b36 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Thu, 19 Dec 2024 14:46:18 -0800
Subject: [PATCH 28/30] Trust the symbol reference; it's all we've got

---
 lld/ELF/MarkLive.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 7c98ea00acd64f..ac1359a8f861d9 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -132,13 +132,10 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
     // discarded, marking the LSDA will unnecessarily retain the text section.
     if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) ||
                       relSec->nextInSectionGroup))) {
-      Symbol *canonicalSym = nullptr;
-      if (!d->isSection()) {
-        if (offset < d->value + d->size)
-          canonicalSym = d;
-        else if (Symbol *s = relSec->getEnclosingSymbol(offset))
+      Symbol *canonicalSym = d;
+      if (d->isSection())
+        if (Symbol *s = relSec->getEnclosingSymbol(offset))
           canonicalSym = s;
-      }
       enqueue(relSec, offset, canonicalSym, parent);
     }
     return;

>From 224769456be2d7e01ef45d122e58762a1d5e59b2 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 29 Jan 2025 15:46:25 -0800
Subject: [PATCH 29/30] Initial why-live test

---
 lld/test/ELF/why-live.s | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 lld/test/ELF/why-live.s

diff --git a/lld/test/ELF/why-live.s b/lld/test/ELF/why-live.s
new file mode 100644
index 00000000000000..34a6c38ec88e49
--- /dev/null
+++ b/lld/test/ELF/why-live.s
@@ -0,0 +1,17 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -n -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: ld.lld %t.o -o /dev/null --gc-sections --why-live=a | FileCheck %s
+
+# CHECK: blah
+
+.globl _start
+.section ._start,"ax", at progbits
+_start:
+jmp a
+
+.globl a
+.section .a,"ax", at progbits
+a:
+jmp a
+

>From 2d50c974b6939847bddcdffb6c0917e7d4887e55 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 29 Jan 2025 16:16:35 -0800
Subject: [PATCH 30/30] Improve syntax for printing

---
 lld/ELF/MarkLive.cpp | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index ac1359a8f861d9..214281c99e04b4 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -238,20 +238,23 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
 }
 
 template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
-  std::string out;
-  int indent = 0;
   if (!whyLive.contains(s))
     return;
-  for (std::optional<LiveObject> cur = s; cur; indent += 2) {
-    if (indent)
-      out += "\n" + std::string(indent, ' ');
+  auto diag = Msg(ctx);
+  bool first = true;
+  for (std::optional<LiveObject> cur = s; cur;) {
     if (std::holds_alternative<Symbol *>(*cur)) {
       auto *s = std::get<Symbol *>(*cur);
-      out += toStr(ctx, *s) + " from " + toStr(ctx, s->file);
+      // Match the syntax for sections below.
+      diag << toStr(ctx, s->file) << ":(" << toStr(ctx, *s) << ')';
     } else {
       auto *s = std::get<InputSectionBase *>(*cur);
-      // TODO: Fancy formatting
-      out += toStr(ctx, s);
+      diag << toStr(ctx, s);
+    }
+
+    if (first) {
+      diag << " live because:";
+      first = false;
     }
 
     auto it = whyLive.find(*cur);
@@ -270,8 +273,11 @@ template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
              "all live objects should have a tracked reason for being live");
       cur = LiveObject{parent};
     }
+
+    if (cur)
+      diag << "\n>>> referenced by "
+           << (std::holds_alternative<Symbol *>(*cur) ? "symbol " : "section ");
   }
-  message(out);
 }
 
 template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {



More information about the llvm-commits mailing list