[lld] [LLD][COFF] Check both mangled and demangled symbols before adding a lazy archive symbol to the symbol table on ARM64EC (PR #113284)

Jacek Caban via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 22 02:09:12 PDT 2024


https://github.com/cjacek created https://github.com/llvm/llvm-project/pull/113284

On ARM64EC, a function symbol may appear in both mangled and demangled forms:
- ARM64EC archives contain only the mangled name, while the demangled symbol is defined by the object file as an alias.
- x86_64 archives contain only the demangled name (the mangled name is usually defined by an object referencing the symbol as an alias to a guess exit thunk).
- ARM64EC import files contain both the mangled and demangled names for thunks.

If more than one archive defines the same function, this could lead to different libraries being used for the same function depending on how they are referenced. Avoid this by checking if the paired symbol is already defined before adding a symbol to the table.

>From ba1bc2975a630cf09b000b835ef925fc2b8bef6f Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek at codeweavers.com>
Date: Mon, 25 Sep 2023 18:44:29 +0200
Subject: [PATCH 1/2] [LLD][COFF] Allow overriding EC alias symbols with lazy
 archive symbols

On ARM64EC, external functions do not emit undefined symbols. Instead,
they emit a pair of weak-dependency aliases: `func` to `#func`, and
`#func` to the func guess exit thunk. This change allows such aliases
to be overridden by lazy archive symbols, similar to how we handle
undefined symbols.
---
 lld/COFF/InputFiles.cpp        | 35 +++++++++++---
 lld/COFF/InputFiles.h          |  2 +-
 lld/COFF/SymbolTable.cpp       | 10 ++--
 lld/COFF/SymbolTable.h         |  2 +-
 lld/COFF/Symbols.h             |  4 ++
 lld/test/COFF/arm64ec-lib.test | 85 ++++++++++++++++++++++++++++++++++
 6 files changed, 126 insertions(+), 12 deletions(-)

diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 292c3bfc1eaa9d..fdbc1de4beaf32 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -455,11 +455,34 @@ void ObjFile::initializeSymbols() {
     COFFSymbolRef coffSym = check(coffObj->getSymbol(i));
     bool prevailingComdat;
     if (coffSym.isUndefined()) {
-      symbols[i] = createUndefined(coffSym);
+      symbols[i] = createUndefined(coffSym, false);
     } else if (coffSym.isWeakExternal()) {
-      symbols[i] = createUndefined(coffSym);
-      weakAliases.emplace_back(symbols[i],
-                               coffSym.getAux<coff_aux_weak_external>());
+      auto aux = coffSym.getAux<coff_aux_weak_external>();
+      bool overrideLazy = true;
+
+      // On ARM64EC, external functions don't emit undefined symbols. Instead,
+      // they emit a pair of weak-dependency aliases: func to #func and
+      // #func to the func guess exit thunk. Allow such aliases to be overridden
+      // by lazy archive symbols, just as we would for undefined symbols.
+      if (isArm64EC(getMachineType()) &&
+          aux->Characteristics == IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY) {
+        COFFSymbolRef targetSym = check(coffObj->getSymbol(aux->TagIndex));
+        if (!targetSym.isAnyUndefined()) {
+          // If the target is defined, it may be either a guess exit thunk or
+          // the actual implementation. If it's the latter, consider the alias
+          // to be part of the implementation and override potential lazy archive
+          // symbols.
+          StringRef targetName = check(coffObj->getSymbolName(targetSym));
+          StringRef name = check(coffObj->getSymbolName(coffSym));
+          std::optional<std::string> mangledName =
+              getArm64ECMangledFunctionName(name);
+          overrideLazy = mangledName == targetName;
+        } else {
+          overrideLazy = false;
+        }
+      }
+      symbols[i] = createUndefined(coffSym, overrideLazy);
+      weakAliases.emplace_back(symbols[i], aux);
     } else if (std::optional<Symbol *> optSym =
                    createDefined(coffSym, comdatDefs, prevailingComdat)) {
       symbols[i] = *optSym;
@@ -508,9 +531,9 @@ void ObjFile::initializeSymbols() {
   decltype(sparseChunks)().swap(sparseChunks);
 }
 
-Symbol *ObjFile::createUndefined(COFFSymbolRef sym) {
+Symbol *ObjFile::createUndefined(COFFSymbolRef sym, bool overrideLazy) {
   StringRef name = check(coffObj->getSymbolName(sym));
-  return ctx.symtab.addUndefined(name, this, sym.isWeakExternal());
+  return ctx.symtab.addUndefined(name, this, overrideLazy);
 }
 
 static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj,
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index a20b097cbe04af..77f7e298166eec 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -272,7 +272,7 @@ class ObjFile : public InputFile {
                     &comdatDefs,
                 bool &prevailingComdat);
   Symbol *createRegular(COFFSymbolRef sym);
-  Symbol *createUndefined(COFFSymbolRef sym);
+  Symbol *createUndefined(COFFSymbolRef sym, bool overrideLazy);
 
   std::unique_ptr<COFFObjectFile> coffObj;
 
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 230ae74dfb21d0..435b3bf4dbab80 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -620,9 +620,9 @@ void SymbolTable::initializeECThunks() {
 }
 
 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
-                                  bool isWeakAlias) {
+                                  bool overrideLazy) {
   auto [s, wasInserted] = insert(name, f);
-  if (wasInserted || (s->isLazy() && isWeakAlias)) {
+  if (wasInserted || (s->isLazy() && overrideLazy)) {
     replaceSymbol<Undefined>(s, name);
     return s;
   }
@@ -639,7 +639,8 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
     return;
   }
   auto *u = dyn_cast<Undefined>(s);
-  if (!u || u->weakAlias || s->pendingArchiveLoad)
+  if (!u || (u->weakAlias && !u->isECAlias(ctx.config.machine)) ||
+      s->pendingArchiveLoad)
     return;
   s->pendingArchiveLoad = true;
   f->addMember(sym);
@@ -653,7 +654,8 @@ void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
     return;
   }
   auto *u = dyn_cast<Undefined>(s);
-  if (!u || u->weakAlias || s->pendingArchiveLoad)
+  if (!u || (u->weakAlias && !u->isECAlias(ctx.config.machine)) ||
+      s->pendingArchiveLoad)
     return;
   s->pendingArchiveLoad = true;
   f->lazy = false;
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index dab03afde3f987..1d9e908b8b9918 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -91,7 +91,7 @@ class SymbolTable {
   Symbol *addSynthetic(StringRef n, Chunk *c);
   Symbol *addAbsolute(StringRef n, uint64_t va);
 
-  Symbol *addUndefined(StringRef name, InputFile *f, bool isWeakAlias);
+  Symbol *addUndefined(StringRef name, InputFile *f, bool overrideLazy);
   void addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym);
   void addLazyObject(InputFile *f, StringRef n);
   void addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym, StringRef n);
diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h
index ff84ff8ad7b28b..203a542466c68e 100644
--- a/lld/COFF/Symbols.h
+++ b/lld/COFF/Symbols.h
@@ -353,6 +353,10 @@ class Undefined : public Symbol {
     isAntiDep = antiDep;
   }
 
+  bool isECAlias(MachineTypes machine) const {
+    return weakAlias && isAntiDep && isArm64EC(machine);
+  }
+
   // If this symbol is external weak, replace this object with aliased symbol.
   bool resolveWeakAlias();
 };
diff --git a/lld/test/COFF/arm64ec-lib.test b/lld/test/COFF/arm64ec-lib.test
index a26c098547fdbe..78b9f326aab893 100644
--- a/lld/test/COFF/arm64ec-lib.test
+++ b/lld/test/COFF/arm64ec-lib.test
@@ -7,10 +7,16 @@ RUN: llvm-mc -filetype=obj -triple=aarch64-windows nsymref.s -o nsymref-aarch64.
 RUN: llvm-mc -filetype=obj -triple=arm64ec-windows sym.s -o sym-arm64ec.obj
 RUN: llvm-mc -filetype=obj -triple=x86_64-windows sym.s -o sym-x86_64.obj
 RUN: llvm-mc -filetype=obj -triple=aarch64-windows nsym.s -o nsym-aarch64.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows ref-alias.s -o ref-alias.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows ref-thunk.s -o ref-thunk.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows func.s -o func.obj
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows func-x86_64.s -o func-x86_64.obj
 RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
 
 RUN: llvm-lib -machine:arm64ec -out:sym-arm64ec.lib sym-arm64ec.obj nsym-aarch64.obj
 RUN: llvm-lib -machine:amd64 -out:sym-x86_64.lib sym-x86_64.obj
+RUN: llvm-lib -machine:arm64ec -out:func.lib func.obj
+RUN: llvm-lib -machine:arm64ec -out:func-x86_64.lib func-x86_64.obj
 
 Verify that a symbol can be referenced from ECSYMBOLS.
 RUN: lld-link -machine:arm64ec -dll -noentry -out:test.dll symref-arm64ec.obj sym-arm64ec.lib loadconfig-arm64ec.obj
@@ -26,6 +32,49 @@ RUN: not lld-link -machine:arm64ec -dll -noentry -out:test-err.dll nsymref-arm64
 RUN:              FileCheck --check-prefix=ERR %s
 ERR: error: undefined symbol: nsym
 
+Verify that a library symbol can be referenced, even if its name conflicts with an anti-dependency alias.
+RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-alias-1.dll ref-alias.obj func.lib loadconfig-arm64ec.obj
+RUN: llvm-objdump -d ref-alias-1.dll | FileCheck -check-prefix=DISASM %s
+DISASM:      0000000180001000 <.text>:
+DISASM-NEXT: 180001000: d65f03c0     ret
+DISASM-EMPTY:
+
+RUN: llvm-readobj --hex-dump=.test ref-alias-1.dll | FileCheck -check-prefix=TESTSEC %s
+TESTSEC: 0x180004000 00100000
+
+The same test, but with a different input order.
+RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-alias-2.dll func.lib ref-alias.obj loadconfig-arm64ec.obj
+RUN: llvm-objdump -d ref-alias-2.dll | FileCheck -check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test ref-alias-2.dll | FileCheck -check-prefix=TESTSEC %s
+
+Verify that when an anti-dependency to a guess exit thunk is present, it is overridden by an archive symbol.
+RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-1.dll ref-thunk.obj func.lib loadconfig-arm64ec.obj
+RUN: llvm-objdump -d ref-thunk-1.dll | FileCheck -check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test ref-thunk-1.dll | FileCheck -check-prefix=TESTSEC %s
+
+The same test, but with a different input order.
+RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-2.dll func.lib ref-thunk.obj loadconfig-arm64ec.obj
+RUN: llvm-objdump -d ref-thunk-2.dll | FileCheck -check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test ref-thunk-2.dll | FileCheck -check-prefix=TESTSEC %s
+
+Test linking against an x86_64 library (which uses a demangled function name).
+RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-x86-1.dll ref-thunk.obj func-x86_64.lib loadconfig-arm64ec.obj
+RUN: llvm-objdump -d ref-x86-1.dll | FileCheck -check-prefix=DISASM-X86 %s
+RUN: llvm-readobj --hex-dump=.test ref-x86-1.dll | FileCheck -check-prefix=TESTSEC %s
+
+DISASM-X86:      0000000180001000 <.text>:
+DISASM-X86-NEXT: 180001000: c3                           retq
+
+The same test, but with a different input order.
+RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-x86-2.dll func-x86_64.lib ref-thunk.obj loadconfig-arm64ec.obj
+RUN: llvm-objdump -d ref-x86-2.dll | FileCheck -check-prefix=DISASM-X86 %s
+RUN: llvm-readobj --hex-dump=.test ref-x86-2.dll | FileCheck -check-prefix=TESTSEC %s
+
+A similar test using -start-lib for linking.
+RUN: lld-link -machine:arm64ec -dll -noentry -out:start-lib-1.dll ref-thunk.obj -start-lib func.obj -end-lib loadconfig-arm64ec.obj
+RUN: llvm-objdump -d start-lib-1.dll | FileCheck -check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test start-lib-1.dll | FileCheck -check-prefix=TESTSEC %s
+
 #--- symref.s
     .data
     .rva sym
@@ -45,3 +94,39 @@ sym:
      .globl nsym
 nsym:
      .word 0
+
+#--- ref-alias.s
+    .weak_anti_dep func
+.set func,"#func"
+
+    .section .test, "r"
+    .rva func
+
+#--- ref-thunk.s
+    .weak_anti_dep func
+.set func, "#func"
+    .weak_anti_dep "#func"
+.set "#func", thunksym
+
+    .section .test, "r"
+    .rva func
+
+    .section .thnk,"xr",discard,thunksym
+thunksym:
+    mov w0, #2
+    ret
+
+#--- func.s
+    .text
+    .globl "#func"
+"#func":
+    ret
+
+    .weak_anti_dep func
+.set func,"#func"
+
+#--- func-x86_64.s
+    .text
+    .globl func
+func:
+    ret

>From f7c45ee69a89336fa14182c4e021a6a19da56de8 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek at codeweavers.com>
Date: Mon, 21 Oct 2024 23:31:45 +0200
Subject: [PATCH 2/2] [LLD][COFF] Check both mangled and demangled symbols
 before adding a lazy archive symbol to the symbol table on ARM64EC

On ARM64EC, a function symbol may appear in both mangled and demangled forms:
- ARM64EC archives contain only the mangled name, while the demangled symbol is
  defined by the object file as an alias.
- x86_64 archives contain only the demangled name (the mangled name is usually
  defined by an object referencing the symbol as an alias to a guess exit thunk).
- ARM64EC import files contain both the mangled and demangled names for thunks.
If more than one archive defines the same function, this could lead to
different libraries being used for the same function depending on how they
are referenced. Avoid this by checking if the paired symbol is already defined
before adding a symbol to the table.
---
 lld/COFF/SymbolTable.cpp       | 39 ++++++++++++++++++++++++++++++++++
 lld/test/COFF/arm64ec-lib.test | 20 +++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 435b3bf4dbab80..c93f74c6924192 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -18,6 +18,7 @@
 #include "lld/Common/Timer.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/LTO/LTO.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Parallel.h"
@@ -631,8 +632,44 @@ Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
   return s;
 }
 
+// On ARM64EC, a function symbol may appear in both mangled and demangled forms:
+// - ARM64EC archives contain only the mangled name, while the demangled symbol is
+//   defined by the object file as an alias.
+// - x86_64 archives contain only the demangled name (the mangled name is usually
+//   defined by an object referencing the symbol as an alias to a guess exit thunk).
+// - ARM64EC import files contain both the mangled and demangled names for thunks.
+// If more than one archive defines the same function, this could lead to
+// different libraries being used for the same function depending on how they
+// are referenced. Avoid this by checking if the paired symbol is already defined
+// before adding a symbol to the table.
+template <typename T>
+bool checkLazyECPair(SymbolTable *symtab, StringRef name, InputFile *f) {
+  if (name.starts_with("__imp_"))
+    return true;
+  std::string pairName;
+  if (std::optional<std::string> mangledName =
+          getArm64ECMangledFunctionName(name))
+    pairName = std::move(*mangledName);
+  else
+    pairName = *getArm64ECDemangledFunctionName(name);
+
+  Symbol *sym = symtab->find(pairName);
+  if (!sym)
+    return true;
+  if (sym->pendingArchiveLoad)
+    return false;
+  if (auto u = dyn_cast<Undefined>(sym))
+    return !u->weakAlias || u->isAntiDep;
+  // If the symbol is lazy, allow it only if it originates from the same archive.
+  auto lazy = dyn_cast<T>(sym);
+  return lazy && lazy->file == f;
+}
+
 void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
   StringRef name = sym.getName();
+  if (isArm64EC(ctx.config.machine) &&
+      !checkLazyECPair<LazyArchive>(this, name, f))
+    return;
   auto [s, wasInserted] = insert(name);
   if (wasInserted) {
     replaceSymbol<LazyArchive>(s, f, sym);
@@ -648,6 +685,8 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
 
 void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
   assert(f->lazy);
+  if (isArm64EC(ctx.config.machine) && !checkLazyECPair<LazyObject>(this, n, f))
+    return;
   auto [s, wasInserted] = insert(n, f);
   if (wasInserted) {
     replaceSymbol<LazyObject>(s, f, n);
diff --git a/lld/test/COFF/arm64ec-lib.test b/lld/test/COFF/arm64ec-lib.test
index 78b9f326aab893..c63c59b46e5740 100644
--- a/lld/test/COFF/arm64ec-lib.test
+++ b/lld/test/COFF/arm64ec-lib.test
@@ -17,6 +17,7 @@ RUN: llvm-lib -machine:arm64ec -out:sym-arm64ec.lib sym-arm64ec.obj nsym-aarch64
 RUN: llvm-lib -machine:amd64 -out:sym-x86_64.lib sym-x86_64.obj
 RUN: llvm-lib -machine:arm64ec -out:func.lib func.obj
 RUN: llvm-lib -machine:arm64ec -out:func-x86_64.lib func-x86_64.obj
+RUN: llvm-lib -machine:arm64ec -out:func-imp.lib -def:func.def
 
 Verify that a symbol can be referenced from ECSYMBOLS.
 RUN: lld-link -machine:arm64ec -dll -noentry -out:test.dll symref-arm64ec.obj sym-arm64ec.lib loadconfig-arm64ec.obj
@@ -57,6 +58,15 @@ RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-2.dll func.lib ref-t
 RUN: llvm-objdump -d ref-thunk-2.dll | FileCheck -check-prefix=DISASM %s
 RUN: llvm-readobj --hex-dump=.test ref-thunk-2.dll | FileCheck -check-prefix=TESTSEC %s
 
+Pass multiple libraries containing `func` with different manglings and ensure they don't conflict with each other.
+RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-3.dll func.lib loadconfig-arm64ec.obj func-x86_64.lib func-imp.lib ref-thunk.obj
+RUN: llvm-objdump -d ref-thunk-3.dll | FileCheck -check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test ref-thunk-3.dll | FileCheck -check-prefix=TESTSEC %s
+
+RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-4.dll ref-thunk.obj func.lib loadconfig-arm64ec.obj func-x86_64.lib func-imp.lib
+RUN: llvm-objdump -d ref-thunk-4.dll | FileCheck -check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test ref-thunk-4.dll | FileCheck -check-prefix=TESTSEC %s
+
 Test linking against an x86_64 library (which uses a demangled function name).
 RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-x86-1.dll ref-thunk.obj func-x86_64.lib loadconfig-arm64ec.obj
 RUN: llvm-objdump -d ref-x86-1.dll | FileCheck -check-prefix=DISASM-X86 %s
@@ -75,6 +85,11 @@ RUN: lld-link -machine:arm64ec -dll -noentry -out:start-lib-1.dll ref-thunk.obj
 RUN: llvm-objdump -d start-lib-1.dll | FileCheck -check-prefix=DISASM %s
 RUN: llvm-readobj --hex-dump=.test start-lib-1.dll | FileCheck -check-prefix=TESTSEC %s
 
+RUN: lld-link -machine:arm64ec -dll -noentry -out:start-lib-2.dll ref-thunk.obj -start-lib func.obj -end-lib loadconfig-arm64ec.obj \
+RUN:          -start-lib func-x86_64.obj -end-lib func-imp.lib
+RUN: llvm-objdump -d ref-thunk-3.dll | FileCheck -check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test ref-thunk-3.dll | FileCheck -check-prefix=TESTSEC %s
+
 #--- symref.s
     .data
     .rva sym
@@ -130,3 +145,8 @@ thunksym:
     .globl func
 func:
     ret
+
+#--- func.def
+LIBRARY func.dll
+EXPORTS
+        func
\ No newline at end of file



More information about the llvm-commits mailing list