[lld] [LLD][COFF] Move symbol mangling and lookup helpers to SymbolTable class (NFC) (PR #122836)
Jacek Caban via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 13 17:00:44 PST 2025
https://github.com/cjacek created https://github.com/llvm/llvm-project/pull/122836
This refactor prepares for further ARM64X hybrid support, where these helpers will need to work with either the native or EC symbol table based on context.
>From 380b16adee4f62b54e777f493ca99b24df162837 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek at codeweavers.com>
Date: Tue, 14 Jan 2025 01:36:04 +0100
Subject: [PATCH] [LLD][COFF] Move symbol mangling and lookup helpers to
SymbolTable class (NFC)
This refactor prepares for further ARM64X hybrid support, where these helpers will
need to work with either the native or EC symbol table based on context.
---
lld/COFF/Driver.cpp | 162 ++++++++-------------------------------
lld/COFF/Driver.h | 19 -----
lld/COFF/SymbolTable.cpp | 106 +++++++++++++++++++++++++
lld/COFF/SymbolTable.h | 17 ++++
4 files changed, 153 insertions(+), 151 deletions(-)
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 0d89457046a500..6af6b4f7307664 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -37,7 +37,6 @@
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Parallel.h"
@@ -172,23 +171,10 @@ static std::future<MBErrPair> createFutureForFile(std::string path) {
});
}
-// Symbol names are mangled by prepending "_" on x86.
-StringRef LinkerDriver::mangle(StringRef sym) {
- assert(ctx.config.machine != IMAGE_FILE_MACHINE_UNKNOWN);
- if (ctx.config.machine == I386)
- return saver().save("_" + sym);
- return sym;
-}
-
llvm::Triple::ArchType LinkerDriver::getArch() {
return getMachineArchType(ctx.config.machine);
}
-bool LinkerDriver::findUnderscoreMangle(StringRef sym) {
- Symbol *s = ctx.symtab.findMangle(mangle(sym));
- return s && !isa<Undefined>(s);
-}
-
static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) {
if (mt == IMAGE_FILE_MACHINE_UNKNOWN)
return true;
@@ -486,7 +472,7 @@ void LinkerDriver::parseDirectives(InputFile *file) {
SmallVector<StringRef, 2> vec;
e.split(vec, ',');
for (StringRef sym : vec)
- excludedSymbols.insert(mangle(sym));
+ excludedSymbols.insert(file->symtab.mangle(sym));
}
// https://docs.microsoft.com/en-us/cpp/preprocessor/comment-c-cpp?view=msvc-160
@@ -505,7 +491,8 @@ void LinkerDriver::parseDirectives(InputFile *file) {
case OPT_entry:
if (!arg->getValue()[0])
Fatal(ctx) << "missing entry point symbol name";
- ctx.config.entry = file->symtab.addGCRoot(mangle(arg->getValue()), true);
+ ctx.config.entry =
+ file->symtab.addGCRoot(file->symtab.mangle(arg->getValue()), true);
break;
case OPT_failifmismatch:
checkFailIfMismatch(arg->getValue(), file);
@@ -805,97 +792,6 @@ void LinkerDriver::addLibSearchPaths() {
}
}
-void LinkerDriver::addUndefinedGlob(StringRef arg) {
- Expected<GlobPattern> pat = GlobPattern::create(arg);
- if (!pat) {
- Err(ctx) << "/includeglob: " << toString(pat.takeError());
- return;
- }
-
- SmallVector<Symbol *, 0> syms;
- ctx.symtab.forEachSymbol([&syms, &pat](Symbol *sym) {
- if (pat->match(sym->getName())) {
- syms.push_back(sym);
- }
- });
-
- for (Symbol *sym : syms)
- ctx.symtab.addGCRoot(sym->getName());
-}
-
-StringRef LinkerDriver::mangleMaybe(Symbol *s) {
- // If the plain symbol name has already been resolved, do nothing.
- Undefined *unmangled = dyn_cast<Undefined>(s);
- if (!unmangled)
- return "";
-
- // Otherwise, see if a similar, mangled symbol exists in the symbol table.
- Symbol *mangled = ctx.symtab.findMangle(unmangled->getName());
- if (!mangled)
- return "";
-
- // If we find a similar mangled symbol, make this an alias to it and return
- // its name.
- Log(ctx) << unmangled->getName() << " aliased to " << mangled->getName();
- unmangled->setWeakAlias(ctx.symtab.addUndefined(mangled->getName()));
- return mangled->getName();
-}
-
-// Windows specific -- find default entry point name.
-//
-// There are four different entry point functions for Windows executables,
-// each of which corresponds to a user-defined "main" function. This function
-// infers an entry point from a user-defined "main" function.
-StringRef LinkerDriver::findDefaultEntry() {
- assert(ctx.config.subsystem != IMAGE_SUBSYSTEM_UNKNOWN &&
- "must handle /subsystem before calling this");
-
- if (ctx.config.mingw)
- return mangle(ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI
- ? "WinMainCRTStartup"
- : "mainCRTStartup");
-
- if (ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) {
- if (findUnderscoreMangle("wWinMain")) {
- if (!findUnderscoreMangle("WinMain"))
- return mangle("wWinMainCRTStartup");
- Warn(ctx) << "found both wWinMain and WinMain; using latter";
- }
- return mangle("WinMainCRTStartup");
- }
- if (findUnderscoreMangle("wmain")) {
- if (!findUnderscoreMangle("main"))
- return mangle("wmainCRTStartup");
- Warn(ctx) << "found both wmain and main; using latter";
- }
- return mangle("mainCRTStartup");
-}
-
-WindowsSubsystem LinkerDriver::inferSubsystem() {
- if (ctx.config.dll)
- return IMAGE_SUBSYSTEM_WINDOWS_GUI;
- if (ctx.config.mingw)
- return IMAGE_SUBSYSTEM_WINDOWS_CUI;
- // Note that link.exe infers the subsystem from the presence of these
- // functions even if /entry: or /nodefaultlib are passed which causes them
- // to not be called.
- bool haveMain = findUnderscoreMangle("main");
- bool haveWMain = findUnderscoreMangle("wmain");
- bool haveWinMain = findUnderscoreMangle("WinMain");
- bool haveWWinMain = findUnderscoreMangle("wWinMain");
- if (haveMain || haveWMain) {
- if (haveWinMain || haveWWinMain) {
- Warn(ctx) << "found " << (haveMain ? "main" : "wmain") << " and "
- << (haveWinMain ? "WinMain" : "wWinMain")
- << "; defaulting to /subsystem:console";
- }
- return IMAGE_SUBSYSTEM_WINDOWS_CUI;
- }
- if (haveWinMain || haveWWinMain)
- return IMAGE_SUBSYSTEM_WINDOWS_GUI;
- return IMAGE_SUBSYSTEM_UNKNOWN;
-}
-
uint64_t LinkerDriver::getDefaultImageBase() {
if (ctx.config.is64())
return ctx.config.dll ? 0x180000000 : 0x140000000;
@@ -1539,7 +1435,7 @@ void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) {
SmallVector<StringRef, 2> vec;
StringRef(arg->getValue()).split(vec, ',');
for (StringRef sym : vec)
- exporter.addExcludedSymbol(mangle(sym));
+ exporter.addExcludedSymbol(ctx.symtab.mangle(sym));
}
ctx.symtab.forEachSymbol([&](Symbol *s) {
@@ -2455,7 +2351,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// and after the early return when just writing an import library.
if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) {
llvm::TimeTraceScope timeScope("Infer subsystem");
- config->subsystem = inferSubsystem();
+ config->subsystem = ctx.symtab.inferSubsystem();
if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN)
Fatal(ctx) << "subsystem must be defined";
}
@@ -2466,7 +2362,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (auto *arg = args.getLastArg(OPT_entry)) {
if (!arg->getValue()[0])
Fatal(ctx) << "missing entry point symbol name";
- config->entry = ctx.symtab.addGCRoot(mangle(arg->getValue()), true);
+ config->entry =
+ ctx.symtab.addGCRoot(ctx.symtab.mangle(arg->getValue()), true);
} else if (!config->entry && !config->noEntry) {
if (args.hasArg(OPT_dll)) {
StringRef s = (config->machine == I386) ? "__DllMainCRTStartup at 12"
@@ -2474,11 +2371,12 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
config->entry = ctx.symtab.addGCRoot(s, true);
} else if (config->driverWdm) {
// /driver:wdm implies /entry:_NtProcessStartup
- config->entry = ctx.symtab.addGCRoot(mangle("_NtProcessStartup"), true);
+ config->entry =
+ ctx.symtab.addGCRoot(ctx.symtab.mangle("_NtProcessStartup"), true);
} else {
// Windows specific -- If entry point name is not given, we need to
// infer that from user-defined entry name.
- StringRef s = findDefaultEntry();
+ StringRef s = ctx.symtab.findDefaultEntry();
if (s.empty())
Fatal(ctx) << "entry point must be defined";
config->entry = ctx.symtab.addGCRoot(s, true);
@@ -2568,24 +2466,24 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
config->imageBase = getDefaultImageBase();
ctx.forEachSymtab([&](SymbolTable &symtab) {
- symtab.addSynthetic(mangle("__ImageBase"), nullptr);
+ symtab.addSynthetic(symtab.mangle("__ImageBase"), nullptr);
if (symtab.machine == I386) {
symtab.addAbsolute("___safe_se_handler_table", 0);
symtab.addAbsolute("___safe_se_handler_count", 0);
}
- symtab.addAbsolute(mangle("__guard_fids_count"), 0);
- symtab.addAbsolute(mangle("__guard_fids_table"), 0);
- symtab.addAbsolute(mangle("__guard_flags"), 0);
- symtab.addAbsolute(mangle("__guard_iat_count"), 0);
- symtab.addAbsolute(mangle("__guard_iat_table"), 0);
- symtab.addAbsolute(mangle("__guard_longjmp_count"), 0);
- symtab.addAbsolute(mangle("__guard_longjmp_table"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_fids_count"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_fids_table"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_flags"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_iat_count"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_iat_table"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_longjmp_count"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_longjmp_table"), 0);
// Needed for MSVC 2017 15.5 CRT.
- symtab.addAbsolute(mangle("__enclave_config"), 0);
+ symtab.addAbsolute(symtab.mangle("__enclave_config"), 0);
// Needed for MSVC 2019 16.8 CRT.
- symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0);
- symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_eh_cont_count"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_eh_cont_table"), 0);
if (symtab.isEC()) {
symtab.addAbsolute("__arm64x_extra_rfe_table", 0);
@@ -2606,16 +2504,16 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
}
if (config->pseudoRelocs) {
- symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
- symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
+ symtab.addAbsolute(symtab.mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
+ symtab.addAbsolute(symtab.mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
}
if (config->mingw) {
- symtab.addAbsolute(mangle("__CTOR_LIST__"), 0);
- symtab.addAbsolute(mangle("__DTOR_LIST__"), 0);
+ symtab.addAbsolute(symtab.mangle("__CTOR_LIST__"), 0);
+ symtab.addAbsolute(symtab.mangle("__DTOR_LIST__"), 0);
}
if (config->debug || config->buildIDHash != BuildIDHash::None)
if (symtab.findUnderscore("__buildid"))
- symtab.addUndefined(mangle("__buildid"));
+ symtab.addUndefined(symtab.mangle("__buildid"));
});
// This code may add new undefined symbols to the link, which may enqueue more
@@ -2627,7 +2525,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// Windows specific -- if entry point is not found,
// search for its mangled names.
if (config->entry)
- mangleMaybe(config->entry);
+ ctx.symtab.mangleMaybe(config->entry);
// Windows specific -- Make sure we resolve all dllexported symbols.
for (Export &e : config->exports) {
@@ -2635,7 +2533,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
continue;
e.sym = ctx.symtab.addGCRoot(e.name, !e.data);
if (e.source != ExportSource::Directives)
- e.symbolName = mangleMaybe(e.sym);
+ e.symbolName = ctx.symtab.mangleMaybe(e.sym);
}
// Add weak aliases. Weak aliases is a mechanism to give remaining
@@ -2675,7 +2573,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// Windows specific -- if __load_config_used can be resolved, resolve it.
if (ctx.symtab.findUnderscore("_load_config_used"))
- ctx.symtab.addGCRoot(mangle("_load_config_used"));
+ ctx.symtab.addGCRoot(ctx.symtab.mangle("_load_config_used"));
if (args.hasArg(OPT_include_optional)) {
// Handle /includeoptional
@@ -2688,7 +2586,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// Handle /includeglob
for (StringRef pat : args::getStrings(args, OPT_incl_glob))
- addUndefinedGlob(pat);
+ ctx.symtab.addUndefinedGlob(pat);
// Create wrapped symbols for -wrap option.
std::vector<WrappedSymbol> wrapped = addWrappedSymbols(ctx, args);
diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h
index 9d4f1cbfcb5841..4558f68c041fa4 100644
--- a/lld/COFF/Driver.h
+++ b/lld/COFF/Driver.h
@@ -106,8 +106,6 @@ class LinkerDriver {
StringRef findLib(StringRef filename);
StringRef findLibMinGW(StringRef filename);
- bool findUnderscoreMangle(StringRef sym);
-
// Determines the location of the sysroot based on `args`, environment, etc.
void detectWinSysRoot(const llvm::opt::InputArgList &args);
@@ -115,9 +113,6 @@ class LinkerDriver {
// config.machine has been set.
void addWinSysRootLibSearchPaths();
- // Symbol names are mangled by prepending "_" on x86.
- StringRef mangle(StringRef sym);
-
void setMachine(llvm::COFF::MachineTypes machine);
llvm::Triple::ArchType getArch();
@@ -173,20 +168,6 @@ class LinkerDriver {
std::set<std::string> visitedLibs;
- void addUndefinedGlob(StringRef arg);
-
- StringRef mangleMaybe(Symbol *s);
-
- // Windows specific -- "main" is not the only main function in Windows.
- // You can choose one from these four -- {w,}{WinMain,main}.
- // There are four different entry point functions for them,
- // {w,}{WinMain,main}CRTStartup, respectively. The linker needs to
- // choose the right one depending on which "main" function is defined.
- // This function looks up the symbol table and resolve corresponding
- // entry point name.
- StringRef findDefaultEntry();
- WindowsSubsystem inferSubsystem();
-
void addBuffer(std::unique_ptr<MemoryBuffer> mb, bool wholeArchive,
bool lazy);
void addArchiveBuffer(MemoryBufferRef mbref, StringRef symName,
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index b2f3ffe780e5dc..7c43ada3d136e2 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -21,12 +21,14 @@
#include "llvm/IR/Mangler.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
#include <utility>
using namespace llvm;
+using namespace llvm::COFF;
using namespace llvm::support;
namespace lld::coff {
@@ -1022,6 +1024,110 @@ Symbol *SymbolTable::findMangle(StringRef name) {
return findByPrefix("?" + name.substr(1) + "@@Y");
}
+bool SymbolTable::findUnderscoreMangle(StringRef sym) {
+ Symbol *s = findMangle(mangle(sym));
+ return s && !isa<Undefined>(s);
+}
+
+// Symbol names are mangled by prepending "_" on x86.
+StringRef SymbolTable::mangle(StringRef sym) {
+ assert(machine != IMAGE_FILE_MACHINE_UNKNOWN);
+ if (machine == I386)
+ return saver().save("_" + sym);
+ return sym;
+}
+
+StringRef SymbolTable::mangleMaybe(Symbol *s) {
+ // If the plain symbol name has already been resolved, do nothing.
+ Undefined *unmangled = dyn_cast<Undefined>(s);
+ if (!unmangled)
+ return "";
+
+ // Otherwise, see if a similar, mangled symbol exists in the symbol table.
+ Symbol *mangled = findMangle(unmangled->getName());
+ if (!mangled)
+ return "";
+
+ // If we find a similar mangled symbol, make this an alias to it and return
+ // its name.
+ Log(ctx) << unmangled->getName() << " aliased to " << mangled->getName();
+ unmangled->setWeakAlias(addUndefined(mangled->getName()));
+ return mangled->getName();
+}
+
+// Windows specific -- find default entry point name.
+//
+// There are four different entry point functions for Windows executables,
+// each of which corresponds to a user-defined "main" function. This function
+// infers an entry point from a user-defined "main" function.
+StringRef SymbolTable::findDefaultEntry() {
+ assert(ctx.config.subsystem != IMAGE_SUBSYSTEM_UNKNOWN &&
+ "must handle /subsystem before calling this");
+
+ if (ctx.config.mingw)
+ return mangle(ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI
+ ? "WinMainCRTStartup"
+ : "mainCRTStartup");
+
+ if (ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) {
+ if (findUnderscoreMangle("wWinMain")) {
+ if (!findUnderscoreMangle("WinMain"))
+ return mangle("wWinMainCRTStartup");
+ Warn(ctx) << "found both wWinMain and WinMain; using latter";
+ }
+ return mangle("WinMainCRTStartup");
+ }
+ if (findUnderscoreMangle("wmain")) {
+ if (!findUnderscoreMangle("main"))
+ return mangle("wmainCRTStartup");
+ Warn(ctx) << "found both wmain and main; using latter";
+ }
+ return mangle("mainCRTStartup");
+}
+
+WindowsSubsystem SymbolTable::inferSubsystem() {
+ if (ctx.config.dll)
+ return IMAGE_SUBSYSTEM_WINDOWS_GUI;
+ if (ctx.config.mingw)
+ return IMAGE_SUBSYSTEM_WINDOWS_CUI;
+ // Note that link.exe infers the subsystem from the presence of these
+ // functions even if /entry: or /nodefaultlib are passed which causes them
+ // to not be called.
+ bool haveMain = findUnderscoreMangle("main");
+ bool haveWMain = findUnderscoreMangle("wmain");
+ bool haveWinMain = findUnderscoreMangle("WinMain");
+ bool haveWWinMain = findUnderscoreMangle("wWinMain");
+ if (haveMain || haveWMain) {
+ if (haveWinMain || haveWWinMain) {
+ Warn(ctx) << "found " << (haveMain ? "main" : "wmain") << " and "
+ << (haveWinMain ? "WinMain" : "wWinMain")
+ << "; defaulting to /subsystem:console";
+ }
+ return IMAGE_SUBSYSTEM_WINDOWS_CUI;
+ }
+ if (haveWinMain || haveWWinMain)
+ return IMAGE_SUBSYSTEM_WINDOWS_GUI;
+ return IMAGE_SUBSYSTEM_UNKNOWN;
+}
+
+void SymbolTable::addUndefinedGlob(StringRef arg) {
+ Expected<GlobPattern> pat = GlobPattern::create(arg);
+ if (!pat) {
+ Err(ctx) << "/includeglob: " << toString(pat.takeError());
+ return;
+ }
+
+ SmallVector<Symbol *, 0> syms;
+ forEachSymbol([&syms, &pat](Symbol *sym) {
+ if (pat->match(sym->getName())) {
+ syms.push_back(sym);
+ }
+ });
+
+ for (Symbol *sym : syms)
+ addGCRoot(sym->getName());
+}
+
Symbol *SymbolTable::addUndefined(StringRef name) {
return addUndefined(name, nullptr, false);
}
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index 4c749ae059d277..1de0b3e1deac3e 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -74,11 +74,27 @@ class SymbolTable {
Symbol *find(StringRef name) const;
Symbol *findUnderscore(StringRef name) const;
+ void addUndefinedGlob(StringRef arg);
+
// Occasionally we have to resolve an undefined symbol to its
// mangled symbol. This function tries to find a mangled name
// for U from the symbol table, and if found, set the symbol as
// a weak alias for U.
Symbol *findMangle(StringRef name);
+ StringRef mangleMaybe(Symbol *s);
+
+ // Symbol names are mangled by prepending "_" on x86.
+ StringRef mangle(StringRef sym);
+
+ // Windows specific -- "main" is not the only main function in Windows.
+ // You can choose one from these four -- {w,}{WinMain,main}.
+ // There are four different entry point functions for them,
+ // {w,}{WinMain,main}CRTStartup, respectively. The linker needs to
+ // choose the right one depending on which "main" function is defined.
+ // This function looks up the symbol table and resolve corresponding
+ // entry point name.
+ StringRef findDefaultEntry();
+ WindowsSubsystem inferSubsystem();
// Build a set of COFF objects representing the combined contents of
// BitcodeFiles and add them to the symbol table. Called after all files are
@@ -152,6 +168,7 @@ class SymbolTable {
/// Same as insert(Name), but also sets isUsedInRegularObj.
std::pair<Symbol *, bool> insert(StringRef name, InputFile *f);
+ bool findUnderscoreMangle(StringRef sym);
std::vector<Symbol *> getSymsWithPrefix(StringRef prefix);
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> symMap;
More information about the llvm-commits
mailing list