[lld] [ELF] Fix unnecessary inclusion of unreferenced provide symbols (PR #84512)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 8 08:20:24 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld

@llvm/pr-subscribers-lld-elf

Author: None (partaror)

<details>
<summary>Changes</summary>

Previously, linker was unnecessarily including a PROVIDE symbol which was referenced by another unused PROVIDE symbol. For example, if a linker script contained the below code and 'not_used_sym' provide symbol is not included, then linker was still unnecessarily including 'foo' PROVIDE symbol because it was referenced by 'not_used_sym'. This commit fixes this behavior.

PROVIDE(not_used_sym = foo)
PROVIDE(foo = 0x1000)

This commit fixes this behavior by using dfs-like algorithm to find all the symbols referenced in provide expressions of included provide symbols.

Closes #<!-- -->74771

---
Full diff: https://github.com/llvm/llvm-project/pull/84512.diff


4 Files Affected:

- (modified) lld/ELF/Driver.cpp (+51-7) 
- (modified) lld/ELF/LinkerScript.h (+10) 
- (modified) lld/ELF/ScriptParser.cpp (+12-1) 
- (modified) lld/test/ELF/linkerscript/symbolreferenced.s (+16-3) 


``````````diff
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 24faa1753f1e3d..fff81d17755945 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -2659,6 +2659,56 @@ static void postParseObjectFile(ELFFileBase *file) {
   }
 }
 
+// Returns true if the provide symbol should be added to the link.
+bool shouldAddProvideSym(StringRef symName) {
+  Symbol *b = symtab.find(symName);
+  return b && !b->isDefined() && !b->isCommon();
+}
+
+// Add symbols referred by the provide symbol to the symbol table.
+// This function must only be called for provide symbols that should be added
+// to the link.
+static void
+addProvideSymReferences(StringRef provideSym,
+                        llvm::StringSet<> &addedRefsFromProvideSym) {
+
+  if (addedRefsFromProvideSym.count(provideSym))
+    return;
+  assert((shouldAddProvideSym(provideSym) || !symtab.find(provideSym)) &&
+         "This function must only be called for provide symbols that should be "
+         "added to the link.");
+  addedRefsFromProvideSym.insert(provideSym);
+  for (StringRef name : script->provideMap[provideSym].keys()) {
+    script->referencedSymbols.push_back(name);
+    if (script->provideMap.count(name) &&
+        (shouldAddProvideSym(name) || !symtab.find(name)) &&
+        !addedRefsFromProvideSym.count(name))
+      addProvideSymReferences(name, addedRefsFromProvideSym);
+  }
+}
+
+// Add symbols that are referenced in the linker script.
+// Symbols referenced in a PROVIDE command are only added to the symbol table if
+// the PROVIDE command actually provides the symbol.
+static void addScriptReferencedSymbols() {
+  // Keeps track of references from which PROVIDE symbols have been added to the
+  // symbol table.
+  llvm::StringSet<> addedRefsFromProvideSym;
+  for (StringRef provideSym : script->provideMap.keys()) {
+    if (!addedRefsFromProvideSym.count(provideSym) &&
+        shouldAddProvideSym(provideSym))
+      addProvideSymReferences(provideSym, addedRefsFromProvideSym);
+  }
+
+  // Some symbols (such as __ehdr_start) are defined lazily only when there
+  // are undefined symbols for them, so we add these to trigger that logic.
+  for (StringRef name : script->referencedSymbols) {
+    Symbol *sym = addUnusedUndefined(name);
+    sym->isUsedInRegularObj = true;
+    sym->referenced = true;
+  }
+}
+
 // Do actual linking. Note that when this function is called,
 // all linker scripts have already been parsed.
 void LinkerDriver::link(opt::InputArgList &args) {
@@ -2735,13 +2785,7 @@ void LinkerDriver::link(opt::InputArgList &args) {
   config->hasDynSymTab =
       !ctx.sharedFiles.empty() || config->isPic || config->exportDynamic;
 
-  // Some symbols (such as __ehdr_start) are defined lazily only when there
-  // are undefined symbols for them, so we add these to trigger that logic.
-  for (StringRef name : script->referencedSymbols) {
-    Symbol *sym = addUnusedUndefined(name);
-    sym->isUsedInRegularObj = true;
-    sym->referenced = true;
-  }
+  addScriptReferencedSymbols();
 
   // Prevent LTO from removing any definition referenced by -u.
   for (StringRef name : config->undefined)
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 18eaf58b785e37..9cd2d3f273d70f 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -16,7 +16,9 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Compiler.h"
 #include <cstddef>
 #include <cstdint>
@@ -379,6 +381,14 @@ class LinkerScript final {
 
   // Sections that will be warned/errored by --orphan-handling.
   SmallVector<const InputSectionBase *, 0> orphanSections;
+
+  // Stores the mapping: provide symbol -> symbols referred in the provide
+  // expression. For example, if the PROVIDE command is:
+  //
+  // PROVIDE(v = a + b + c);
+  //
+  // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
+  llvm::StringMap<llvm::StringSet<>> provideMap;
 };
 
 LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 3bb1de99480f30..e9953f44156e31 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Support/TimeProfiler.h"
 #include <cassert>
 #include <limits>
+#include <optional>
 #include <vector>
 
 using namespace llvm;
@@ -138,6 +139,10 @@ class ScriptParser final : ScriptLexer {
 
   // A set to detect an INCLUDE() cycle.
   StringSet<> seen;
+
+  // If we are currently parsing a PROVIDE|PROVIDE_HIDDEN command,
+  // then this member is set to the provide symbol name.
+  std::optional<llvm::StringRef> activeProvideSym;
 };
 } // namespace
 
@@ -1055,6 +1060,9 @@ SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) {
       ;
     return nullptr;
   }
+  llvm::SaveAndRestore saveActiveProvideSym(activeProvideSym);
+  if (provide)
+    activeProvideSym = name;
   SymbolAssignment *cmd = readSymbolAssignment(name);
   cmd->provide = provide;
   cmd->hidden = hidden;
@@ -1570,7 +1578,10 @@ Expr ScriptParser::readPrimary() {
     tok = unquote(tok);
   else if (!isValidSymbolName(tok))
     setError("malformed number: " + tok);
-  script->referencedSymbols.push_back(tok);
+  if (activeProvideSym)
+    script->provideMap[activeProvideSym.value()].insert(tok);
+  else
+    script->referencedSymbols.push_back(tok);
   return [=] { return script->getSymbolValue(tok, location); };
 }
 
diff --git a/lld/test/ELF/linkerscript/symbolreferenced.s b/lld/test/ELF/linkerscript/symbolreferenced.s
index ba7a7721ea9697..587446b1dcb52a 100644
--- a/lld/test/ELF/linkerscript/symbolreferenced.s
+++ b/lld/test/ELF/linkerscript/symbolreferenced.s
@@ -23,9 +23,16 @@
 
 # CHECK:      0000000000001000 a f1
 # CHECK-NEXT: 0000000000001000 A f2
-# CHECK-NEXT: 0000000000001000 a g1
-# CHECK-NEXT: 0000000000001000 A g2
+# CHECK-NEXT: 0000000000001000 A f3
+# CHECK-NEXT: 0000000000001000 A f4
+# CHECK-NEXT: 0000000000001000 A f5
+# CHECK-NEXT: 0000000000001000 A f6
+# CHECK-NEXT: 0000000000001000 A f7
 # CHECK-NEXT: 0000000000001000 A newsym
+# CHECK-NOT: g1
+# CHECK-NOT: g2
+# CHECK-NOT: unused
+# CHECK-NOT: another_unused
 
 # RUN: not ld.lld -T chain2.t a.o 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
 # ERR-COUNT-3: error: chain2.t:1: symbol not found: undef
@@ -40,13 +47,19 @@ patatino:
   movl newsym, %eax
 
 #--- chain.t
-PROVIDE(f2 = 0x1000);
+PROVIDE(f7 = 0x1000);
+PROVIDE(f5 = f6);
+PROVIDE(f6 = f7);
+PROVIDE(f4 = f5);
+PROVIDE(f3 = f4);
+PROVIDE(f2 = f3);
 PROVIDE_HIDDEN(f1 = f2);
 PROVIDE(newsym = f1);
 
 PROVIDE(g2 = 0x1000);
 PROVIDE_HIDDEN(g1 = g2);
 PROVIDE(unused = g1);
+PROVIDE_HIDDEN(another_unused = g1);
 
 #--- chain2.t
 PROVIDE(f2 = undef);

``````````

</details>


https://github.com/llvm/llvm-project/pull/84512


More information about the llvm-commits mailing list