[lld] [LLD][COFF] Check both mangled and demangled symbols before adding a lazy archive symbol to the symbol table on ARM64EC (PR #113284)
Jacek Caban via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 23 03:50:01 PDT 2024
https://github.com/cjacek updated https://github.com/llvm/llvm-project/pull/113284
>From 14be0061f068dfc0602f1bcde188323db0a50f82 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek at codeweavers.com>
Date: Mon, 21 Oct 2024 23:31:45 +0200
Subject: [PATCH] [LLD][COFF] Check both mangled and demangled symbols before
adding a lazy archive symbol to the symbol table on ARM64EC
On ARM64EC, a function symbol may appear in both mangled and demangled forms:
- ARM64EC archives contain only the mangled name, while the demangled symbol is
defined by the object file as an alias.
- x86_64 archives contain only the demangled name (the mangled name is usually
defined by an object referencing the symbol as an alias to a guess exit thunk).
- ARM64EC import files contain both the mangled and demangled names for thunks.
If more than one archive defines the same function, this could lead to
different libraries being used for the same function depending on how they
are referenced. Avoid this by checking if the paired symbol is already defined
before adding a symbol to the table.
---
lld/COFF/SymbolTable.cpp | 42 ++++++++++++++++++++++++++++++++++
lld/test/COFF/arm64ec-lib.test | 20 ++++++++++++++++
2 files changed, 62 insertions(+)
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 435b3bf4dbab80..aeeebbb4e332ab 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -18,6 +18,7 @@
#include "lld/Common/Timer.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Parallel.h"
@@ -631,8 +632,47 @@ Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
return s;
}
+// On ARM64EC, a function symbol may appear in both mangled and demangled forms:
+// - ARM64EC archives contain only the mangled name, while the demangled symbol
+// is defined by the object file as an alias.
+// - x86_64 archives contain only the demangled name (the mangled name is
+// usually defined by an object referencing the symbol as an alias to a guess
+// exit thunk).
+// - ARM64EC import files contain both the mangled and demangled names for
+// thunks.
+// If more than one archive defines the same function, this could lead
+// to different libraries being used for the same function depending on how they
+// are referenced. Avoid this by checking if the paired symbol is already
+// defined before adding a symbol to the table.
+template <typename T>
+bool checkLazyECPair(SymbolTable *symtab, StringRef name, InputFile *f) {
+ if (name.starts_with("__imp_"))
+ return true;
+ std::string pairName;
+ if (std::optional<std::string> mangledName =
+ getArm64ECMangledFunctionName(name))
+ pairName = std::move(*mangledName);
+ else
+ pairName = *getArm64ECDemangledFunctionName(name);
+
+ Symbol *sym = symtab->find(pairName);
+ if (!sym)
+ return true;
+ if (sym->pendingArchiveLoad)
+ return false;
+ if (auto u = dyn_cast<Undefined>(sym))
+ return !u->weakAlias || u->isAntiDep;
+ // If the symbol is lazy, allow it only if it originates from the same
+ // archive.
+ auto lazy = dyn_cast<T>(sym);
+ return lazy && lazy->file == f;
+}
+
void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
StringRef name = sym.getName();
+ if (isArm64EC(ctx.config.machine) &&
+ !checkLazyECPair<LazyArchive>(this, name, f))
+ return;
auto [s, wasInserted] = insert(name);
if (wasInserted) {
replaceSymbol<LazyArchive>(s, f, sym);
@@ -648,6 +688,8 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
assert(f->lazy);
+ if (isArm64EC(ctx.config.machine) && !checkLazyECPair<LazyObject>(this, n, f))
+ return;
auto [s, wasInserted] = insert(n, f);
if (wasInserted) {
replaceSymbol<LazyObject>(s, f, n);
diff --git a/lld/test/COFF/arm64ec-lib.test b/lld/test/COFF/arm64ec-lib.test
index 617728dac0ab25..075854f62d5d42 100644
--- a/lld/test/COFF/arm64ec-lib.test
+++ b/lld/test/COFF/arm64ec-lib.test
@@ -17,6 +17,7 @@ RUN: llvm-lib -machine:arm64ec -out:sym-arm64ec.lib sym-arm64ec.obj nsym-aarch64
RUN: llvm-lib -machine:amd64 -out:sym-x86_64.lib sym-x86_64.obj
RUN: llvm-lib -machine:arm64ec -out:func.lib func.obj
RUN: llvm-lib -machine:arm64ec -out:func-x86_64.lib func-x86_64.obj
+RUN: llvm-lib -machine:arm64ec -out:func-imp.lib -def:func.def
Verify that a symbol can be referenced from ECSYMBOLS.
RUN: lld-link -machine:arm64ec -dll -noentry -out:test.dll symref-arm64ec.obj sym-arm64ec.lib loadconfig-arm64ec.obj
@@ -57,6 +58,15 @@ RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-2.dll func.lib ref-t
RUN: llvm-objdump -d ref-thunk-2.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test ref-thunk-2.dll | FileCheck -check-prefix=TESTSEC %s
+Pass multiple libraries containing `func` with different manglings and ensure they don't conflict with each other.
+RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-3.dll func.lib loadconfig-arm64ec.obj func-x86_64.lib func-imp.lib ref-thunk.obj
+RUN: llvm-objdump -d ref-thunk-3.dll | FileCheck -check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test ref-thunk-3.dll | FileCheck -check-prefix=TESTSEC %s
+
+RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-4.dll ref-thunk.obj func.lib loadconfig-arm64ec.obj func-x86_64.lib func-imp.lib
+RUN: llvm-objdump -d ref-thunk-4.dll | FileCheck -check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test ref-thunk-4.dll | FileCheck -check-prefix=TESTSEC %s
+
Test linking against an x86_64 library (which uses a demangled function name).
RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-x86-1.dll ref-thunk.obj func-x86_64.lib loadconfig-arm64ec.obj
RUN: llvm-objdump -d ref-x86-1.dll | FileCheck -check-prefix=DISASM-X86 %s
@@ -80,6 +90,11 @@ RUN: lld-link -machine:arm64ec -dll -noentry -out:start-lib-1.dll ref-thunk.obj
RUN: llvm-objdump -d start-lib-1.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test start-lib-1.dll | FileCheck -check-prefix=TESTSEC %s
+RUN: lld-link -machine:arm64ec -dll -noentry -out:start-lib-2.dll ref-thunk.obj -start-lib func.obj -end-lib loadconfig-arm64ec.obj \
+RUN: -start-lib func-x86_64.obj -end-lib func-imp.lib
+RUN: llvm-objdump -d ref-thunk-3.dll | FileCheck -check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test ref-thunk-3.dll | FileCheck -check-prefix=TESTSEC %s
+
#--- symref.s
.data
.rva sym
@@ -135,3 +150,8 @@ thunksym:
.globl func
func:
ret
+
+#--- func.def
+LIBRARY func.dll
+EXPORTS
+ func
More information about the llvm-commits
mailing list