[lld] [ELF] Fix unnecessary inclusion of unreferenced provide symbols (PR #84512)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 8 08:20:24 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld
@llvm/pr-subscribers-lld-elf
Author: None (partaror)
<details>
<summary>Changes</summary>
Previously, linker was unnecessarily including a PROVIDE symbol which was referenced by another unused PROVIDE symbol. For example, if a linker script contained the below code and 'not_used_sym' provide symbol is not included, then linker was still unnecessarily including 'foo' PROVIDE symbol because it was referenced by 'not_used_sym'. This commit fixes this behavior.
PROVIDE(not_used_sym = foo)
PROVIDE(foo = 0x1000)
This commit fixes this behavior by using dfs-like algorithm to find all the symbols referenced in provide expressions of included provide symbols.
Closes #<!-- -->74771
---
Full diff: https://github.com/llvm/llvm-project/pull/84512.diff
4 Files Affected:
- (modified) lld/ELF/Driver.cpp (+51-7)
- (modified) lld/ELF/LinkerScript.h (+10)
- (modified) lld/ELF/ScriptParser.cpp (+12-1)
- (modified) lld/test/ELF/linkerscript/symbolreferenced.s (+16-3)
``````````diff
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 24faa1753f1e3d..fff81d17755945 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -2659,6 +2659,56 @@ static void postParseObjectFile(ELFFileBase *file) {
}
}
+// Returns true if the provide symbol should be added to the link.
+bool shouldAddProvideSym(StringRef symName) {
+ Symbol *b = symtab.find(symName);
+ return b && !b->isDefined() && !b->isCommon();
+}
+
+// Add symbols referred by the provide symbol to the symbol table.
+// This function must only be called for provide symbols that should be added
+// to the link.
+static void
+addProvideSymReferences(StringRef provideSym,
+ llvm::StringSet<> &addedRefsFromProvideSym) {
+
+ if (addedRefsFromProvideSym.count(provideSym))
+ return;
+ assert((shouldAddProvideSym(provideSym) || !symtab.find(provideSym)) &&
+ "This function must only be called for provide symbols that should be "
+ "added to the link.");
+ addedRefsFromProvideSym.insert(provideSym);
+ for (StringRef name : script->provideMap[provideSym].keys()) {
+ script->referencedSymbols.push_back(name);
+ if (script->provideMap.count(name) &&
+ (shouldAddProvideSym(name) || !symtab.find(name)) &&
+ !addedRefsFromProvideSym.count(name))
+ addProvideSymReferences(name, addedRefsFromProvideSym);
+ }
+}
+
+// Add symbols that are referenced in the linker script.
+// Symbols referenced in a PROVIDE command are only added to the symbol table if
+// the PROVIDE command actually provides the symbol.
+static void addScriptReferencedSymbols() {
+ // Keeps track of references from which PROVIDE symbols have been added to the
+ // symbol table.
+ llvm::StringSet<> addedRefsFromProvideSym;
+ for (StringRef provideSym : script->provideMap.keys()) {
+ if (!addedRefsFromProvideSym.count(provideSym) &&
+ shouldAddProvideSym(provideSym))
+ addProvideSymReferences(provideSym, addedRefsFromProvideSym);
+ }
+
+ // Some symbols (such as __ehdr_start) are defined lazily only when there
+ // are undefined symbols for them, so we add these to trigger that logic.
+ for (StringRef name : script->referencedSymbols) {
+ Symbol *sym = addUnusedUndefined(name);
+ sym->isUsedInRegularObj = true;
+ sym->referenced = true;
+ }
+}
+
// Do actual linking. Note that when this function is called,
// all linker scripts have already been parsed.
void LinkerDriver::link(opt::InputArgList &args) {
@@ -2735,13 +2785,7 @@ void LinkerDriver::link(opt::InputArgList &args) {
config->hasDynSymTab =
!ctx.sharedFiles.empty() || config->isPic || config->exportDynamic;
- // Some symbols (such as __ehdr_start) are defined lazily only when there
- // are undefined symbols for them, so we add these to trigger that logic.
- for (StringRef name : script->referencedSymbols) {
- Symbol *sym = addUnusedUndefined(name);
- sym->isUsedInRegularObj = true;
- sym->referenced = true;
- }
+ addScriptReferencedSymbols();
// Prevent LTO from removing any definition referenced by -u.
for (StringRef name : config->undefined)
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 18eaf58b785e37..9cd2d3f273d70f 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -16,7 +16,9 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Compiler.h"
#include <cstddef>
#include <cstdint>
@@ -379,6 +381,14 @@ class LinkerScript final {
// Sections that will be warned/errored by --orphan-handling.
SmallVector<const InputSectionBase *, 0> orphanSections;
+
+ // Stores the mapping: provide symbol -> symbols referred in the provide
+ // expression. For example, if the PROVIDE command is:
+ //
+ // PROVIDE(v = a + b + c);
+ //
+ // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
+ llvm::StringMap<llvm::StringSet<>> provideMap;
};
LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 3bb1de99480f30..e9953f44156e31 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/TimeProfiler.h"
#include <cassert>
#include <limits>
+#include <optional>
#include <vector>
using namespace llvm;
@@ -138,6 +139,10 @@ class ScriptParser final : ScriptLexer {
// A set to detect an INCLUDE() cycle.
StringSet<> seen;
+
+ // If we are currently parsing a PROVIDE|PROVIDE_HIDDEN command,
+ // then this member is set to the provide symbol name.
+ std::optional<llvm::StringRef> activeProvideSym;
};
} // namespace
@@ -1055,6 +1060,9 @@ SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) {
;
return nullptr;
}
+ llvm::SaveAndRestore saveActiveProvideSym(activeProvideSym);
+ if (provide)
+ activeProvideSym = name;
SymbolAssignment *cmd = readSymbolAssignment(name);
cmd->provide = provide;
cmd->hidden = hidden;
@@ -1570,7 +1578,10 @@ Expr ScriptParser::readPrimary() {
tok = unquote(tok);
else if (!isValidSymbolName(tok))
setError("malformed number: " + tok);
- script->referencedSymbols.push_back(tok);
+ if (activeProvideSym)
+ script->provideMap[activeProvideSym.value()].insert(tok);
+ else
+ script->referencedSymbols.push_back(tok);
return [=] { return script->getSymbolValue(tok, location); };
}
diff --git a/lld/test/ELF/linkerscript/symbolreferenced.s b/lld/test/ELF/linkerscript/symbolreferenced.s
index ba7a7721ea9697..587446b1dcb52a 100644
--- a/lld/test/ELF/linkerscript/symbolreferenced.s
+++ b/lld/test/ELF/linkerscript/symbolreferenced.s
@@ -23,9 +23,16 @@
# CHECK: 0000000000001000 a f1
# CHECK-NEXT: 0000000000001000 A f2
-# CHECK-NEXT: 0000000000001000 a g1
-# CHECK-NEXT: 0000000000001000 A g2
+# CHECK-NEXT: 0000000000001000 A f3
+# CHECK-NEXT: 0000000000001000 A f4
+# CHECK-NEXT: 0000000000001000 A f5
+# CHECK-NEXT: 0000000000001000 A f6
+# CHECK-NEXT: 0000000000001000 A f7
# CHECK-NEXT: 0000000000001000 A newsym
+# CHECK-NOT: g1
+# CHECK-NOT: g2
+# CHECK-NOT: unused
+# CHECK-NOT: another_unused
# RUN: not ld.lld -T chain2.t a.o 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
# ERR-COUNT-3: error: chain2.t:1: symbol not found: undef
@@ -40,13 +47,19 @@ patatino:
movl newsym, %eax
#--- chain.t
-PROVIDE(f2 = 0x1000);
+PROVIDE(f7 = 0x1000);
+PROVIDE(f5 = f6);
+PROVIDE(f6 = f7);
+PROVIDE(f4 = f5);
+PROVIDE(f3 = f4);
+PROVIDE(f2 = f3);
PROVIDE_HIDDEN(f1 = f2);
PROVIDE(newsym = f1);
PROVIDE(g2 = 0x1000);
PROVIDE_HIDDEN(g1 = g2);
PROVIDE(unused = g1);
+PROVIDE_HIDDEN(another_unused = g1);
#--- chain2.t
PROVIDE(f2 = undef);
``````````
</details>
https://github.com/llvm/llvm-project/pull/84512
More information about the llvm-commits
mailing list