[lld] a954bb1 - [ELF] Add --why-extract= to query why archive members/lazy object files are extracted

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 20 09:52:34 PDT 2021


Author: Fangrui Song
Date: 2021-09-20T09:52:30-07:00
New Revision: a954bb18b14363e133217e7f19aa95fbde2c7488

URL: https://github.com/llvm/llvm-project/commit/a954bb18b14363e133217e7f19aa95fbde2c7488
DIFF: https://github.com/llvm/llvm-project/commit/a954bb18b14363e133217e7f19aa95fbde2c7488.diff

LOG: [ELF] Add --why-extract= to query why archive members/lazy object files are extracted

Similar to D69607 but for archive member extraction unrelated to GC. This patch adds --why-extract=.

Prior art:

GNU ld -M prints
```
Archive member included to satisfy reference by file (symbol)

a.a(a.o)                      main.o (a)
b.a(b.o)                      (b())
```

-M is mainly for input section/symbol assignment <-> output section mapping
(often huge output) and the information may appear ad-hoc.

Apple ld64
```
__Z1bv forced load of b.a(b.o)
_a forced load of a.a(a.o)
```

It doesn't say the reference file.

Arm's proprietary linker
```
Selecting member vsnprintf.o(c_wfu.l) to define vsnprintf.
...
Loading member vsnprintf.o from c_wfu.l.
              definition:  vsnprintf
              reference :  _printf_a
```

---

--why-extract= gives the user the full data (which is much shorter than GNU ld
-Map). It is easy to track a chain of references to one archive member with a
one-liner, e.g.

```
% ld.lld main.o a_b.a b_c.a c.a -o /dev/null --why-extract=- | tee stdout
reference       extracted       symbol
main.o  a_b.a(a_b.o)    a
a_b.a(a_b.o)    b_c.a(b_c.o)    b()
b_c.a(b_c.o)    c.a(c.o)        c()

% ruby -ane 'BEGIN{p={}}; p[$F[1]]=[$F[0],$F[2]] if $.>1; END{x="c.a(c.o)"; while y=p[x]; puts "#{y[0]} extracts #{x} to resolve #{y[1]}"; x=y[0] end}' stdout
b_c.a(b_c.o) extracts c.a(c.o) to resolve c()
a_b.a(a_b.o) extracts b_c.a(b_c.o) to resolve b()
main.o extracts a_b.a(a_b.o) to resolve a
```

Archive member extraction happens before --gc-sections, so this may not be a live path
under --gc-sections, but I think it is a good approximation in practice.

* Specifying a file avoids output interleaving with --verbose.
* Required `=` prevents accidental overwrite of an input if the user forgets `=`. (Most of compiler drivers' long options accept `=` but not ` `)

Differential Revision: https://reviews.llvm.org/D109572

Added: 
    lld/test/ELF/why-extract.s

Modified: 
    lld/ELF/Config.h
    lld/ELF/Driver.cpp
    lld/ELF/MapFile.cpp
    lld/ELF/MapFile.h
    lld/ELF/Options.td
    lld/ELF/Symbols.cpp
    lld/ELF/Symbols.h
    lld/ELF/Writer.cpp
    lld/docs/ReleaseNotes.rst
    lld/docs/ld.lld.1

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index a3d28d57d39a..f9851d03e78b 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -127,6 +127,7 @@ struct Configuration {
   llvm::StringRef sysroot;
   llvm::StringRef thinLTOCacheDir;
   llvm::StringRef thinLTOIndexOnlyArg;
+  llvm::StringRef whyExtract;
   llvm::StringRef ltoBasicBlockSections;
   std::pair<llvm::StringRef, llvm::StringRef> thinLTOObjectSuffixReplace;
   std::pair<llvm::StringRef, llvm::StringRef> thinLTOPrefixReplace;

diff  --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 34667c4a7ece..6607c0fe15a4 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -94,6 +94,7 @@ bool elf::link(ArrayRef<const char *> args, bool canExitEarly,
     objectFiles.clear();
     sharedFiles.clear();
     backwardReferences.clear();
+    whyExtract.clear();
 
     tar = nullptr;
     memset(&in, 0, sizeof(in));
@@ -1171,6 +1172,7 @@ static void readConfigs(opt::InputArgList &args) {
   config->warnCommon = args.hasFlag(OPT_warn_common, OPT_no_warn_common, false);
   config->warnSymbolOrdering =
       args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
+  config->whyExtract = args.getLastArgValue(OPT_why_extract);
   config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
   config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
   config->zForceBti = hasZOption(args, "force-bti");
@@ -1696,13 +1698,16 @@ static void excludeLibs(opt::InputArgList &args) {
 }
 
 // Force Sym to be entered in the output.
-static void handleUndefined(Symbol *sym) {
+static void handleUndefined(Symbol *sym, const char *option) {
   // Since a symbol may not be used inside the program, LTO may
   // eliminate it. Mark the symbol as "used" to prevent it.
   sym->isUsedInRegularObj = true;
 
-  if (sym->isLazy())
-    sym->fetch();
+  if (!sym->isLazy())
+    return;
+  sym->fetch();
+  if (!config->whyExtract.empty())
+    whyExtract.emplace_back(option, sym->file, *sym);
 }
 
 // As an extension to GNU linkers, lld supports a variant of `-u`
@@ -1725,7 +1730,7 @@ static void handleUndefinedGlob(StringRef arg) {
   }
 
   for (Symbol *sym : syms)
-    handleUndefined(sym);
+    handleUndefined(sym, "--undefined-glob");
 }
 
 static void handleLibcall(StringRef name) {
@@ -2192,6 +2197,9 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
             e.message());
     if (auto e = tryCreateFile(config->mapFile))
       error("cannot open map file " + config->mapFile + ": " + e.message());
+    if (auto e = tryCreateFile(config->whyExtract))
+      error("cannot open --why-extract= file " + config->whyExtract + ": " +
+            e.message());
   }
   if (errorCount())
     return;
@@ -2246,7 +2254,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
 
   // If an entry symbol is in a static archive, pull out that file now.
   if (Symbol *sym = symtab->find(config->entry))
-    handleUndefined(sym);
+    handleUndefined(sym, "--entry");
 
   // Handle the `--undefined-glob <pattern>` options.
   for (StringRef pat : args::getStrings(args, OPT_undefined_glob))

diff  --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp
index 239c6c394840..c4690ae5aefd 100644
--- a/lld/ELF/MapFile.cpp
+++ b/lld/ELF/MapFile.cpp
@@ -215,6 +215,25 @@ void elf::writeMapFile() {
   }
 }
 
+void elf::writeWhyExtract() {
+  if (config->whyExtract.empty())
+    return;
+
+  std::error_code ec;
+  raw_fd_ostream os(config->whyExtract, ec, sys::fs::OF_None);
+  if (ec) {
+    error("cannot open --why-extract= file " + config->whyExtract + ": " +
+          ec.message());
+    return;
+  }
+
+  os << "reference\textracted\tsymbol\n";
+  for (auto &entry : whyExtract) {
+    os << std::get<0>(entry) << '\t' << toString(std::get<1>(entry)) << '\t'
+       << toString(std::get<2>(entry)) << '\n';
+  }
+}
+
 static void print(StringRef a, StringRef b) {
   lld::outs() << left_justify(a, 49) << " " << b << "\n";
 }

diff  --git a/lld/ELF/MapFile.h b/lld/ELF/MapFile.h
index c4da18f8ad7f..1b8c0168c0de 100644
--- a/lld/ELF/MapFile.h
+++ b/lld/ELF/MapFile.h
@@ -12,6 +12,7 @@
 namespace lld {
 namespace elf {
 void writeMapFile();
+void writeWhyExtract();
 void writeCrossReferenceTable();
 void writeArchiveStats();
 } // namespace elf

diff  --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index b5c1be5c600c..874399d5f41f 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -492,6 +492,8 @@ defm whole_archive: B<"whole-archive",
     "Force load of all members in a static library",
     "Do not force load of all members in a static library (default)">;
 
+def why_extract: JJ<"why-extract=">, HelpText<"Print to a file about why archive members are extracted">;
+
 defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and "
                        "__real_symbol references to symbol">,
             MetaVarName<"<symbol>">;

diff  --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index cef303f05f89..5f95a1b3c7ac 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -64,6 +64,8 @@ Defined *ElfSym::riscvGlobalPointer;
 Defined *ElfSym::tlsModuleBase;
 DenseMap<const Symbol *, std::pair<const InputFile *, const InputFile *>>
     elf::backwardReferences;
+SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>, 0>
+    elf::whyExtract;
 
 static uint64_t getSymVA(const Symbol &sym, int64_t &addend) {
   switch (sym.kind()) {
@@ -321,6 +323,11 @@ void elf::printTraceSymbol(const Symbol *sym) {
   message(toString(sym->file) + s + sym->getName());
 }
 
+static void recordWhyExtract(const InputFile *reference,
+                             const InputFile &extracted, const Symbol &sym) {
+  whyExtract.emplace_back(toString(reference), &extracted, sym);
+}
+
 void elf::maybeWarnUnorderableSymbol(const Symbol *sym) {
   if (!config->warnSymbolOrdering)
     return;
@@ -533,6 +540,9 @@ void Symbol::resolveUndefined(const Undefined &other) {
                    file->groupId < other.file->groupId;
     fetch();
 
+    if (!config->whyExtract.empty())
+      recordWhyExtract(other.file, *file, *this);
+
     // We don't report backward references to weak symbols as they can be
     // overridden later.
     //
@@ -742,7 +752,10 @@ template <class LazyT> void Symbol::resolveLazy(const LazyT &other) {
     return;
   }
 
+  const InputFile *oldFile = file;
   other.fetch();
+  if (!config->whyExtract.empty())
+    recordWhyExtract(oldFile, *file, *this);
 }
 
 void Symbol::resolveShared(const SharedSymbol &other) {

diff  --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index d486beb26d0e..4f5cc3f67631 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ELF.h"
+#include <tuple>
 
 namespace lld {
 // Returns a string representation for a symbol for diagnostics.
@@ -582,6 +583,11 @@ extern llvm::DenseMap<const Symbol *,
                       std::pair<const InputFile *, const InputFile *>>
     backwardReferences;
 
+// A tuple of (reference, extractedFile, sym). Used by --why-extract=.
+extern SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>,
+                   0>
+    whyExtract;
+
 } // namespace elf
 } // namespace lld
 

diff  --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index a9b0854fad92..0fdc64852b9d 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -622,11 +622,12 @@ template <class ELFT> void Writer<ELFT>::run() {
     for (OutputSection *sec : outputSections)
       sec->addr = 0;
 
-  // Handle --print-map(-M)/--Map, --cref and --print-archive-stats=. Dump them
-  // before checkSections() because the files may be useful in case
-  // checkSections() or openFile() fails, for example, due to an erroneous file
-  // size.
+  // Handle --print-map(-M)/--Map, --why-extract=, --cref and
+  // --print-archive-stats=. Dump them before checkSections() because the files
+  // may be useful in case checkSections() or openFile() fails, for example, due
+  // to an erroneous file size.
   writeMapFile();
+  writeWhyExtract();
   writeCrossReferenceTable();
   writeArchiveStats();
 

diff  --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 1ab3f9971fbf..91a1417b0db7 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -26,6 +26,8 @@ ELF Improvements
 
 * ``--export-dynamic-symbol-list`` has been added.
   (`D107317 <https://reviews.llvm.org/D107317>`_)
+* ``--why-extract`` has been added to query why archive members/lazy object files are extracted.
+  (`D109572 <https://reviews.llvm.org/D109572>`_)
 * ``e_entry`` no longer falls back to the address of ``.text`` if the entry symbol does not exist.
   Instead, a value of 0 will be written.
   (`D110014 <https://reviews.llvm.org/D110014>`_)

diff  --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index cce00837c542..dda76c468d32 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -658,6 +658,8 @@ linkers, and may be removed in the future.
 Report unresolved symbols as warnings.
 .It Fl -whole-archive
 Force load of all members in a static library.
+.It Fl -why-extract Ns = Ns Ar file
+Print to a file about why archive members are extracted.
 .It Fl -wrap Ns = Ns Ar symbol
 Redirect
 .Ar symbol

diff  --git a/lld/test/ELF/why-extract.s b/lld/test/ELF/why-extract.s
new file mode 100644
index 000000000000..c5b5784c4a0f
--- /dev/null
+++ b/lld/test/ELF/why-extract.s
@@ -0,0 +1,86 @@
+# REQUIRES: x86
+
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/main.s -o %t/main.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/a.s -o %t/a.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/a_b.s -o %t/a_b.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/b.s -o %t/b.o
+# RUN: llvm-ar rc %t/a.a %t/a.o
+# RUN: llvm-ar rc %t/a_b.a %t/a_b.o
+# RUN: llvm-ar rc %t/b.a %t/b.o
+# RUN: cd %t
+
+## Nothing is extracted from an archive. The file is created with just a header.
+# RUN: ld.lld main.o a.o b.a -o /dev/null --why-extract=why1.txt
+# RUN: FileCheck %s --input-file=why1.txt --check-prefix=CHECK1 --match-full-lines --strict-whitespace
+
+#      CHECK1:reference	extracted	symbol
+#  CHECK1-NOT:{{.}}
+
+## Some archive members are extracted.
+# RUN: ld.lld main.o a_b.a b.a -o /dev/null --why-extract=why2.txt
+# RUN: FileCheck %s --input-file=why2.txt --check-prefix=CHECK2 --match-full-lines --strict-whitespace
+
+#      CHECK2:reference	extracted	symbol
+# CHECK2-NEXT:main.o	a_b.a(a_b.o)	a
+# CHECK2-NEXT:a_b.a(a_b.o)	b.a(b.o)	b()
+
+## Check that backward references are supported.
+## - means stdout.
+# RUN: ld.lld b.a a_b.a main.o -o /dev/null --why-extract=- | FileCheck %s --check-prefix=CHECK3
+
+#      CHECK3:reference	extracted	symbol
+# CHECK3-NEXT:a_b.a(a_b.o)	b.a(b.o)	b()
+# CHECK3-NEXT:main.o	a_b.a(a_b.o)	a
+
+# RUN: ld.lld main.o a_b.a b.a -o /dev/null --no-demangle --why-extract=- | FileCheck %s --check-prefix=MANGLED
+
+# MANGLED: a_b.a(a_b.o)	b.a(b.o)	_Z1bv
+
+# RUN: ld.lld main.o a.a b.a -o /dev/null -u _Z1bv --why-extract=- | FileCheck %s --check-prefix=UNDEFINED
+
+## We insert -u symbol before processing other files, so its name is <internal>.
+## This is not ideal.
+# UNDEFINED: <internal>	b.a(b.o)	b()
+
+# RUN: ld.lld main.o a.a b.a -o /dev/null --undefined-glob '_Z1b*' --why-extract=- | FileCheck %s --check-prefix=UNDEFINED_GLOB
+
+# UNDEFINED_GLOB: --undefined-glob	b.a(b.o)	b()
+
+# RUN: ld.lld main.o a.a b.a -o /dev/null -e _Z1bv --why-extract=- | FileCheck %s --check-prefix=ENTRY
+
+# ENTRY: --entry	b.a(b.o)	b()
+
+# RUN: ld.lld main.o b.a -o /dev/null -T a.lds --why-extract=- | FileCheck %s --check-prefix=SCRIPT
+
+# SCRIPT: <internal>	b.a(b.o)	b()
+
+# RUN: ld.lld main.o --start-lib a_b.o b.o --end-lib -o /dev/null --why-extract=- | FileCheck %s --check-prefix=LAZY
+
+# LAZY: main.o	a_b.o	a
+# LAZY: a_b.o	b.o	b()
+
+# RUN: not ld.lld -shared main.o -o /dev/null --why-extract=/ 2>&1 | FileCheck %s --check-prefix=ERR
+
+# ERR: error: cannot open --why-extract= file /: {{.*}}
+
+#--- main.s
+.globl _start
+_start:
+  call a
+
+#--- a.s
+.globl a
+a:
+
+#--- a_b.s
+.globl a
+a:
+  call _Z1bv
+
+#--- b.s
+.globl _Z1bv
+_Z1bv:
+
+#--- a.lds
+a = _Z1bv;


        


More information about the llvm-commits mailing list