[lld] f22af59 - [LLD][COFF] Move symbol mangling and lookup helpers to SymbolTable class (NFC) (#122836)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 15 06:21:10 PST 2025
Author: Jacek Caban
Date: 2025-01-15T15:21:06+01:00
New Revision: f22af59336d45d2a000f1033be0203340bf8ad36
URL: https://github.com/llvm/llvm-project/commit/f22af59336d45d2a000f1033be0203340bf8ad36
DIFF: https://github.com/llvm/llvm-project/commit/f22af59336d45d2a000f1033be0203340bf8ad36.diff
LOG: [LLD][COFF] Move symbol mangling and lookup helpers to SymbolTable class (NFC) (#122836)
This refactor prepares for further ARM64X hybrid support, where these
helpers will need to work with either the native or EC symbol table
based on context.
Added:
Modified:
lld/COFF/Driver.cpp
lld/COFF/Driver.h
lld/COFF/SymbolTable.cpp
lld/COFF/SymbolTable.h
Removed:
################################################################################
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 0d89457046a500..6af6b4f7307664 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -37,7 +37,6 @@
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Parallel.h"
@@ -172,23 +171,10 @@ static std::future<MBErrPair> createFutureForFile(std::string path) {
});
}
-// Symbol names are mangled by prepending "_" on x86.
-StringRef LinkerDriver::mangle(StringRef sym) {
- assert(ctx.config.machine != IMAGE_FILE_MACHINE_UNKNOWN);
- if (ctx.config.machine == I386)
- return saver().save("_" + sym);
- return sym;
-}
-
llvm::Triple::ArchType LinkerDriver::getArch() {
return getMachineArchType(ctx.config.machine);
}
-bool LinkerDriver::findUnderscoreMangle(StringRef sym) {
- Symbol *s = ctx.symtab.findMangle(mangle(sym));
- return s && !isa<Undefined>(s);
-}
-
static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) {
if (mt == IMAGE_FILE_MACHINE_UNKNOWN)
return true;
@@ -486,7 +472,7 @@ void LinkerDriver::parseDirectives(InputFile *file) {
SmallVector<StringRef, 2> vec;
e.split(vec, ',');
for (StringRef sym : vec)
- excludedSymbols.insert(mangle(sym));
+ excludedSymbols.insert(file->symtab.mangle(sym));
}
// https://docs.microsoft.com/en-us/cpp/preprocessor/comment-c-cpp?view=msvc-160
@@ -505,7 +491,8 @@ void LinkerDriver::parseDirectives(InputFile *file) {
case OPT_entry:
if (!arg->getValue()[0])
Fatal(ctx) << "missing entry point symbol name";
- ctx.config.entry = file->symtab.addGCRoot(mangle(arg->getValue()), true);
+ ctx.config.entry =
+ file->symtab.addGCRoot(file->symtab.mangle(arg->getValue()), true);
break;
case OPT_failifmismatch:
checkFailIfMismatch(arg->getValue(), file);
@@ -805,97 +792,6 @@ void LinkerDriver::addLibSearchPaths() {
}
}
-void LinkerDriver::addUndefinedGlob(StringRef arg) {
- Expected<GlobPattern> pat = GlobPattern::create(arg);
- if (!pat) {
- Err(ctx) << "/includeglob: " << toString(pat.takeError());
- return;
- }
-
- SmallVector<Symbol *, 0> syms;
- ctx.symtab.forEachSymbol([&syms, &pat](Symbol *sym) {
- if (pat->match(sym->getName())) {
- syms.push_back(sym);
- }
- });
-
- for (Symbol *sym : syms)
- ctx.symtab.addGCRoot(sym->getName());
-}
-
-StringRef LinkerDriver::mangleMaybe(Symbol *s) {
- // If the plain symbol name has already been resolved, do nothing.
- Undefined *unmangled = dyn_cast<Undefined>(s);
- if (!unmangled)
- return "";
-
- // Otherwise, see if a similar, mangled symbol exists in the symbol table.
- Symbol *mangled = ctx.symtab.findMangle(unmangled->getName());
- if (!mangled)
- return "";
-
- // If we find a similar mangled symbol, make this an alias to it and return
- // its name.
- Log(ctx) << unmangled->getName() << " aliased to " << mangled->getName();
- unmangled->setWeakAlias(ctx.symtab.addUndefined(mangled->getName()));
- return mangled->getName();
-}
-
-// Windows specific -- find default entry point name.
-//
-// There are four
diff erent entry point functions for Windows executables,
-// each of which corresponds to a user-defined "main" function. This function
-// infers an entry point from a user-defined "main" function.
-StringRef LinkerDriver::findDefaultEntry() {
- assert(ctx.config.subsystem != IMAGE_SUBSYSTEM_UNKNOWN &&
- "must handle /subsystem before calling this");
-
- if (ctx.config.mingw)
- return mangle(ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI
- ? "WinMainCRTStartup"
- : "mainCRTStartup");
-
- if (ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) {
- if (findUnderscoreMangle("wWinMain")) {
- if (!findUnderscoreMangle("WinMain"))
- return mangle("wWinMainCRTStartup");
- Warn(ctx) << "found both wWinMain and WinMain; using latter";
- }
- return mangle("WinMainCRTStartup");
- }
- if (findUnderscoreMangle("wmain")) {
- if (!findUnderscoreMangle("main"))
- return mangle("wmainCRTStartup");
- Warn(ctx) << "found both wmain and main; using latter";
- }
- return mangle("mainCRTStartup");
-}
-
-WindowsSubsystem LinkerDriver::inferSubsystem() {
- if (ctx.config.dll)
- return IMAGE_SUBSYSTEM_WINDOWS_GUI;
- if (ctx.config.mingw)
- return IMAGE_SUBSYSTEM_WINDOWS_CUI;
- // Note that link.exe infers the subsystem from the presence of these
- // functions even if /entry: or /nodefaultlib are passed which causes them
- // to not be called.
- bool haveMain = findUnderscoreMangle("main");
- bool haveWMain = findUnderscoreMangle("wmain");
- bool haveWinMain = findUnderscoreMangle("WinMain");
- bool haveWWinMain = findUnderscoreMangle("wWinMain");
- if (haveMain || haveWMain) {
- if (haveWinMain || haveWWinMain) {
- Warn(ctx) << "found " << (haveMain ? "main" : "wmain") << " and "
- << (haveWinMain ? "WinMain" : "wWinMain")
- << "; defaulting to /subsystem:console";
- }
- return IMAGE_SUBSYSTEM_WINDOWS_CUI;
- }
- if (haveWinMain || haveWWinMain)
- return IMAGE_SUBSYSTEM_WINDOWS_GUI;
- return IMAGE_SUBSYSTEM_UNKNOWN;
-}
-
uint64_t LinkerDriver::getDefaultImageBase() {
if (ctx.config.is64())
return ctx.config.dll ? 0x180000000 : 0x140000000;
@@ -1539,7 +1435,7 @@ void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) {
SmallVector<StringRef, 2> vec;
StringRef(arg->getValue()).split(vec, ',');
for (StringRef sym : vec)
- exporter.addExcludedSymbol(mangle(sym));
+ exporter.addExcludedSymbol(ctx.symtab.mangle(sym));
}
ctx.symtab.forEachSymbol([&](Symbol *s) {
@@ -2455,7 +2351,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// and after the early return when just writing an import library.
if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) {
llvm::TimeTraceScope timeScope("Infer subsystem");
- config->subsystem = inferSubsystem();
+ config->subsystem = ctx.symtab.inferSubsystem();
if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN)
Fatal(ctx) << "subsystem must be defined";
}
@@ -2466,7 +2362,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (auto *arg = args.getLastArg(OPT_entry)) {
if (!arg->getValue()[0])
Fatal(ctx) << "missing entry point symbol name";
- config->entry = ctx.symtab.addGCRoot(mangle(arg->getValue()), true);
+ config->entry =
+ ctx.symtab.addGCRoot(ctx.symtab.mangle(arg->getValue()), true);
} else if (!config->entry && !config->noEntry) {
if (args.hasArg(OPT_dll)) {
StringRef s = (config->machine == I386) ? "__DllMainCRTStartup at 12"
@@ -2474,11 +2371,12 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
config->entry = ctx.symtab.addGCRoot(s, true);
} else if (config->driverWdm) {
// /driver:wdm implies /entry:_NtProcessStartup
- config->entry = ctx.symtab.addGCRoot(mangle("_NtProcessStartup"), true);
+ config->entry =
+ ctx.symtab.addGCRoot(ctx.symtab.mangle("_NtProcessStartup"), true);
} else {
// Windows specific -- If entry point name is not given, we need to
// infer that from user-defined entry name.
- StringRef s = findDefaultEntry();
+ StringRef s = ctx.symtab.findDefaultEntry();
if (s.empty())
Fatal(ctx) << "entry point must be defined";
config->entry = ctx.symtab.addGCRoot(s, true);
@@ -2568,24 +2466,24 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
config->imageBase = getDefaultImageBase();
ctx.forEachSymtab([&](SymbolTable &symtab) {
- symtab.addSynthetic(mangle("__ImageBase"), nullptr);
+ symtab.addSynthetic(symtab.mangle("__ImageBase"), nullptr);
if (symtab.machine == I386) {
symtab.addAbsolute("___safe_se_handler_table", 0);
symtab.addAbsolute("___safe_se_handler_count", 0);
}
- symtab.addAbsolute(mangle("__guard_fids_count"), 0);
- symtab.addAbsolute(mangle("__guard_fids_table"), 0);
- symtab.addAbsolute(mangle("__guard_flags"), 0);
- symtab.addAbsolute(mangle("__guard_iat_count"), 0);
- symtab.addAbsolute(mangle("__guard_iat_table"), 0);
- symtab.addAbsolute(mangle("__guard_longjmp_count"), 0);
- symtab.addAbsolute(mangle("__guard_longjmp_table"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_fids_count"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_fids_table"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_flags"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_iat_count"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_iat_table"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_longjmp_count"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_longjmp_table"), 0);
// Needed for MSVC 2017 15.5 CRT.
- symtab.addAbsolute(mangle("__enclave_config"), 0);
+ symtab.addAbsolute(symtab.mangle("__enclave_config"), 0);
// Needed for MSVC 2019 16.8 CRT.
- symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0);
- symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_eh_cont_count"), 0);
+ symtab.addAbsolute(symtab.mangle("__guard_eh_cont_table"), 0);
if (symtab.isEC()) {
symtab.addAbsolute("__arm64x_extra_rfe_table", 0);
@@ -2606,16 +2504,16 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
}
if (config->pseudoRelocs) {
- symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
- symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
+ symtab.addAbsolute(symtab.mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
+ symtab.addAbsolute(symtab.mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
}
if (config->mingw) {
- symtab.addAbsolute(mangle("__CTOR_LIST__"), 0);
- symtab.addAbsolute(mangle("__DTOR_LIST__"), 0);
+ symtab.addAbsolute(symtab.mangle("__CTOR_LIST__"), 0);
+ symtab.addAbsolute(symtab.mangle("__DTOR_LIST__"), 0);
}
if (config->debug || config->buildIDHash != BuildIDHash::None)
if (symtab.findUnderscore("__buildid"))
- symtab.addUndefined(mangle("__buildid"));
+ symtab.addUndefined(symtab.mangle("__buildid"));
});
// This code may add new undefined symbols to the link, which may enqueue more
@@ -2627,7 +2525,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// Windows specific -- if entry point is not found,
// search for its mangled names.
if (config->entry)
- mangleMaybe(config->entry);
+ ctx.symtab.mangleMaybe(config->entry);
// Windows specific -- Make sure we resolve all dllexported symbols.
for (Export &e : config->exports) {
@@ -2635,7 +2533,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
continue;
e.sym = ctx.symtab.addGCRoot(e.name, !e.data);
if (e.source != ExportSource::Directives)
- e.symbolName = mangleMaybe(e.sym);
+ e.symbolName = ctx.symtab.mangleMaybe(e.sym);
}
// Add weak aliases. Weak aliases is a mechanism to give remaining
@@ -2675,7 +2573,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// Windows specific -- if __load_config_used can be resolved, resolve it.
if (ctx.symtab.findUnderscore("_load_config_used"))
- ctx.symtab.addGCRoot(mangle("_load_config_used"));
+ ctx.symtab.addGCRoot(ctx.symtab.mangle("_load_config_used"));
if (args.hasArg(OPT_include_optional)) {
// Handle /includeoptional
@@ -2688,7 +2586,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// Handle /includeglob
for (StringRef pat : args::getStrings(args, OPT_incl_glob))
- addUndefinedGlob(pat);
+ ctx.symtab.addUndefinedGlob(pat);
// Create wrapped symbols for -wrap option.
std::vector<WrappedSymbol> wrapped = addWrappedSymbols(ctx, args);
diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h
index 9d4f1cbfcb5841..4558f68c041fa4 100644
--- a/lld/COFF/Driver.h
+++ b/lld/COFF/Driver.h
@@ -106,8 +106,6 @@ class LinkerDriver {
StringRef findLib(StringRef filename);
StringRef findLibMinGW(StringRef filename);
- bool findUnderscoreMangle(StringRef sym);
-
// Determines the location of the sysroot based on `args`, environment, etc.
void detectWinSysRoot(const llvm::opt::InputArgList &args);
@@ -115,9 +113,6 @@ class LinkerDriver {
// config.machine has been set.
void addWinSysRootLibSearchPaths();
- // Symbol names are mangled by prepending "_" on x86.
- StringRef mangle(StringRef sym);
-
void setMachine(llvm::COFF::MachineTypes machine);
llvm::Triple::ArchType getArch();
@@ -173,20 +168,6 @@ class LinkerDriver {
std::set<std::string> visitedLibs;
- void addUndefinedGlob(StringRef arg);
-
- StringRef mangleMaybe(Symbol *s);
-
- // Windows specific -- "main" is not the only main function in Windows.
- // You can choose one from these four -- {w,}{WinMain,main}.
- // There are four
diff erent entry point functions for them,
- // {w,}{WinMain,main}CRTStartup, respectively. The linker needs to
- // choose the right one depending on which "main" function is defined.
- // This function looks up the symbol table and resolve corresponding
- // entry point name.
- StringRef findDefaultEntry();
- WindowsSubsystem inferSubsystem();
-
void addBuffer(std::unique_ptr<MemoryBuffer> mb, bool wholeArchive,
bool lazy);
void addArchiveBuffer(MemoryBufferRef mbref, StringRef symName,
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index b2f3ffe780e5dc..7c43ada3d136e2 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -21,12 +21,14 @@
#include "llvm/IR/Mangler.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
#include <utility>
using namespace llvm;
+using namespace llvm::COFF;
using namespace llvm::support;
namespace lld::coff {
@@ -1022,6 +1024,110 @@ Symbol *SymbolTable::findMangle(StringRef name) {
return findByPrefix("?" + name.substr(1) + "@@Y");
}
+bool SymbolTable::findUnderscoreMangle(StringRef sym) {
+ Symbol *s = findMangle(mangle(sym));
+ return s && !isa<Undefined>(s);
+}
+
+// Symbol names are mangled by prepending "_" on x86.
+StringRef SymbolTable::mangle(StringRef sym) {
+ assert(machine != IMAGE_FILE_MACHINE_UNKNOWN);
+ if (machine == I386)
+ return saver().save("_" + sym);
+ return sym;
+}
+
+StringRef SymbolTable::mangleMaybe(Symbol *s) {
+ // If the plain symbol name has already been resolved, do nothing.
+ Undefined *unmangled = dyn_cast<Undefined>(s);
+ if (!unmangled)
+ return "";
+
+ // Otherwise, see if a similar, mangled symbol exists in the symbol table.
+ Symbol *mangled = findMangle(unmangled->getName());
+ if (!mangled)
+ return "";
+
+ // If we find a similar mangled symbol, make this an alias to it and return
+ // its name.
+ Log(ctx) << unmangled->getName() << " aliased to " << mangled->getName();
+ unmangled->setWeakAlias(addUndefined(mangled->getName()));
+ return mangled->getName();
+}
+
+// Windows specific -- find default entry point name.
+//
+// There are four
diff erent entry point functions for Windows executables,
+// each of which corresponds to a user-defined "main" function. This function
+// infers an entry point from a user-defined "main" function.
+StringRef SymbolTable::findDefaultEntry() {
+ assert(ctx.config.subsystem != IMAGE_SUBSYSTEM_UNKNOWN &&
+ "must handle /subsystem before calling this");
+
+ if (ctx.config.mingw)
+ return mangle(ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI
+ ? "WinMainCRTStartup"
+ : "mainCRTStartup");
+
+ if (ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) {
+ if (findUnderscoreMangle("wWinMain")) {
+ if (!findUnderscoreMangle("WinMain"))
+ return mangle("wWinMainCRTStartup");
+ Warn(ctx) << "found both wWinMain and WinMain; using latter";
+ }
+ return mangle("WinMainCRTStartup");
+ }
+ if (findUnderscoreMangle("wmain")) {
+ if (!findUnderscoreMangle("main"))
+ return mangle("wmainCRTStartup");
+ Warn(ctx) << "found both wmain and main; using latter";
+ }
+ return mangle("mainCRTStartup");
+}
+
+WindowsSubsystem SymbolTable::inferSubsystem() {
+ if (ctx.config.dll)
+ return IMAGE_SUBSYSTEM_WINDOWS_GUI;
+ if (ctx.config.mingw)
+ return IMAGE_SUBSYSTEM_WINDOWS_CUI;
+ // Note that link.exe infers the subsystem from the presence of these
+ // functions even if /entry: or /nodefaultlib are passed which causes them
+ // to not be called.
+ bool haveMain = findUnderscoreMangle("main");
+ bool haveWMain = findUnderscoreMangle("wmain");
+ bool haveWinMain = findUnderscoreMangle("WinMain");
+ bool haveWWinMain = findUnderscoreMangle("wWinMain");
+ if (haveMain || haveWMain) {
+ if (haveWinMain || haveWWinMain) {
+ Warn(ctx) << "found " << (haveMain ? "main" : "wmain") << " and "
+ << (haveWinMain ? "WinMain" : "wWinMain")
+ << "; defaulting to /subsystem:console";
+ }
+ return IMAGE_SUBSYSTEM_WINDOWS_CUI;
+ }
+ if (haveWinMain || haveWWinMain)
+ return IMAGE_SUBSYSTEM_WINDOWS_GUI;
+ return IMAGE_SUBSYSTEM_UNKNOWN;
+}
+
+void SymbolTable::addUndefinedGlob(StringRef arg) {
+ Expected<GlobPattern> pat = GlobPattern::create(arg);
+ if (!pat) {
+ Err(ctx) << "/includeglob: " << toString(pat.takeError());
+ return;
+ }
+
+ SmallVector<Symbol *, 0> syms;
+ forEachSymbol([&syms, &pat](Symbol *sym) {
+ if (pat->match(sym->getName())) {
+ syms.push_back(sym);
+ }
+ });
+
+ for (Symbol *sym : syms)
+ addGCRoot(sym->getName());
+}
+
Symbol *SymbolTable::addUndefined(StringRef name) {
return addUndefined(name, nullptr, false);
}
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index 4c749ae059d277..1de0b3e1deac3e 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -74,11 +74,27 @@ class SymbolTable {
Symbol *find(StringRef name) const;
Symbol *findUnderscore(StringRef name) const;
+ void addUndefinedGlob(StringRef arg);
+
// Occasionally we have to resolve an undefined symbol to its
// mangled symbol. This function tries to find a mangled name
// for U from the symbol table, and if found, set the symbol as
// a weak alias for U.
Symbol *findMangle(StringRef name);
+ StringRef mangleMaybe(Symbol *s);
+
+ // Symbol names are mangled by prepending "_" on x86.
+ StringRef mangle(StringRef sym);
+
+ // Windows specific -- "main" is not the only main function in Windows.
+ // You can choose one from these four -- {w,}{WinMain,main}.
+ // There are four
diff erent entry point functions for them,
+ // {w,}{WinMain,main}CRTStartup, respectively. The linker needs to
+ // choose the right one depending on which "main" function is defined.
+ // This function looks up the symbol table and resolve corresponding
+ // entry point name.
+ StringRef findDefaultEntry();
+ WindowsSubsystem inferSubsystem();
// Build a set of COFF objects representing the combined contents of
// BitcodeFiles and add them to the symbol table. Called after all files are
@@ -152,6 +168,7 @@ class SymbolTable {
/// Same as insert(Name), but also sets isUsedInRegularObj.
std::pair<Symbol *, bool> insert(StringRef name, InputFile *f);
+ bool findUnderscoreMangle(StringRef sym);
std::vector<Symbol *> getSymsWithPrefix(StringRef prefix);
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> symMap;
More information about the llvm-commits
mailing list