[lld] [LLD][COFF] Support anti-dependency symbols (PR #112542)

Jacek Caban via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 16 06:26:15 PDT 2024


https://github.com/cjacek created https://github.com/llvm/llvm-project/pull/112542

Co-authored-by: Billy Laws

Anti-dependency symbols are allowed to be duplicated, with the first definition taking precedence. If a regular weak alias is present, it is preferred over an anti-dependency definition. Chaining anti-dependencies is not allowed.

>From a71855f3157d8804eacf10f2bea351fb0b01f312 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek at codeweavers.com>
Date: Sun, 5 May 2024 01:44:25 +0200
Subject: [PATCH] [LLD][COFF] Support anti-dependency symbols

Co-authored-by: Billy Laws <blaws05 at gmail.com>

Anti-dependency symbols are allowed to be duplicated, with the first definition taking
precedence. If a regular weak alias is present, it is preferred over an anti-dependency
definition. Chaining anti-dependencies is not allowed.
---
 lld/COFF/Driver.cpp                   |  4 +-
 lld/COFF/InputFiles.cpp               | 39 +++++++++------
 lld/COFF/SymbolTable.cpp              |  2 +-
 lld/COFF/Symbols.cpp                  |  5 ++
 lld/COFF/Symbols.h                    | 10 +++-
 lld/test/COFF/weak-antidep-chain.test | 68 +++++++++++++++++++++++++++
 lld/test/COFF/weak-antidep.test       | 54 +++++++++++++++++++++
 7 files changed, 163 insertions(+), 19 deletions(-)
 create mode 100644 lld/test/COFF/weak-antidep-chain.test
 create mode 100644 lld/test/COFF/weak-antidep.test

diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 12e1ae62811270..e7f768789271fa 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -737,7 +737,7 @@ StringRef LinkerDriver::mangleMaybe(Symbol *s) {
   // If we find a similar mangled symbol, make this an alias to it and return
   // its name.
   log(unmangled->getName() + " aliased to " + mangled->getName());
-  unmangled->weakAlias = ctx.symtab.addUndefined(mangled->getName());
+  unmangled->setWeakAlias(ctx.symtab.addUndefined(mangled->getName()));
   return mangled->getName();
 }
 
@@ -2520,7 +2520,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
           continue;
         if (auto *u = dyn_cast<Undefined>(sym))
           if (!u->weakAlias)
-            u->weakAlias = ctx.symtab.addUndefined(to);
+            u->setWeakAlias(ctx.symtab.addUndefined(to));
       }
 
       // If any inputs are bitcode files, the LTO code generator may create
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index b19275abebc3ac..292c3bfc1eaa9d 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -74,19 +74,25 @@ std::string lld::toString(const coff::InputFile *file) {
 /// If Source is Undefined and has no weak alias set, makes it a weak
 /// alias to Target.
 static void checkAndSetWeakAlias(COFFLinkerContext &ctx, InputFile *f,
-                                 Symbol *source, Symbol *target) {
+                                 Symbol *source, Symbol *target,
+                                 bool isAntiDep) {
   if (auto *u = dyn_cast<Undefined>(source)) {
     if (u->weakAlias && u->weakAlias != target) {
-      // Weak aliases as produced by GCC are named in the form
-      // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
-      // of another symbol emitted near the weak symbol.
-      // Just use the definition from the first object file that defined
-      // this weak symbol.
-      if (ctx.config.allowDuplicateWeak)
+      // Ignore duplicated anti-dependency symbols.
+      if (isAntiDep)
         return;
-      ctx.symtab.reportDuplicate(source, f);
+      if (!u->isAntiDep) {
+        // Weak aliases as produced by GCC are named in the form
+        // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
+        // of another symbol emitted near the weak symbol.
+        // Just use the definition from the first object file that defined
+        // this weak symbol.
+        if (ctx.config.allowDuplicateWeak)
+          return;
+        ctx.symtab.reportDuplicate(source, f);
+      }
     }
-    u->weakAlias = target;
+    u->setWeakAlias(target, isAntiDep);
   }
 }
 
@@ -436,7 +442,8 @@ void ObjFile::initializeSymbols() {
   uint32_t numSymbols = coffObj->getNumberOfSymbols();
   symbols.resize(numSymbols);
 
-  SmallVector<std::pair<Symbol *, uint32_t>, 8> weakAliases;
+  SmallVector<std::pair<Symbol *, const coff_aux_weak_external *>, 8>
+      weakAliases;
   std::vector<uint32_t> pendingIndexes;
   pendingIndexes.reserve(numSymbols);
 
@@ -451,8 +458,8 @@ void ObjFile::initializeSymbols() {
       symbols[i] = createUndefined(coffSym);
     } else if (coffSym.isWeakExternal()) {
       symbols[i] = createUndefined(coffSym);
-      uint32_t tagIndex = coffSym.getAux<coff_aux_weak_external>()->TagIndex;
-      weakAliases.emplace_back(symbols[i], tagIndex);
+      weakAliases.emplace_back(symbols[i],
+                               coffSym.getAux<coff_aux_weak_external>());
     } else if (std::optional<Symbol *> optSym =
                    createDefined(coffSym, comdatDefs, prevailingComdat)) {
       symbols[i] = *optSym;
@@ -491,8 +498,10 @@ void ObjFile::initializeSymbols() {
 
   for (auto &kv : weakAliases) {
     Symbol *sym = kv.first;
-    uint32_t idx = kv.second;
-    checkAndSetWeakAlias(ctx, this, sym, symbols[idx]);
+    const coff_aux_weak_external *aux = kv.second;
+    checkAndSetWeakAlias(ctx, this, sym, symbols[aux->TagIndex],
+                         aux->Characteristics ==
+                             IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY);
   }
 
   // Free the memory used by sparseChunks now that symbol loading is finished.
@@ -1202,7 +1211,7 @@ void BitcodeFile::parse() {
       sym = ctx.symtab.addUndefined(symName, this, true);
       std::string fallback = std::string(objSym.getCOFFWeakExternalFallback());
       Symbol *alias = ctx.symtab.addUndefined(saver.save(fallback));
-      checkAndSetWeakAlias(ctx, this, sym, alias);
+      checkAndSetWeakAlias(ctx, this, sym, alias, false);
     } else if (comdatIndex != -1) {
       if (symName == obj->getComdatTable()[comdatIndex].first) {
         sym = comdat[comdatIndex].first;
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index fa09ea64babb28..230ae74dfb21d0 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -315,7 +315,7 @@ void SymbolTable::loadMinGWSymbols() {
             warn("Resolving " + origName + " by linking to " + newName);
           else
             log("Resolving " + origName + " by linking to " + newName);
-          undef->weakAlias = l;
+          undef->setWeakAlias(l);
           continue;
         }
       }
diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp
index 89f2da02bdcf42..f2fa2392ecbbcd 100644
--- a/lld/COFF/Symbols.cpp
+++ b/lld/COFF/Symbols.cpp
@@ -116,6 +116,9 @@ Symbol *Undefined::getWeakAlias() {
   // A weak alias may be a weak alias to another symbol, so check recursively.
   DenseSet<Symbol *> weakChain;
   for (Symbol *a = weakAlias; a; a = cast<Undefined>(a)->weakAlias) {
+    // Anti-dependency symbols can't be chained.
+    if (a->isAntiDep)
+      break;
     if (!isa<Undefined>(a))
       return a;
     if (!weakChain.insert(a).second)
@@ -135,6 +138,7 @@ bool Undefined::resolveWeakAlias() {
   // Symbols. For that reason we need to check which type of symbol we
   // are dealing with and copy the correct number of bytes.
   StringRef name = getName();
+  bool wasAntiDep = isAntiDep;
   if (isa<DefinedRegular>(d))
     memcpy(this, d, sizeof(DefinedRegular));
   else if (isa<DefinedAbsolute>(d))
@@ -144,6 +148,7 @@ bool Undefined::resolveWeakAlias() {
 
   nameData = name.data();
   nameSize = name.size();
+  isAntiDep = wasAntiDep;
   return true;
 }
 
diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h
index a898ebf05fd809..ff84ff8ad7b28b 100644
--- a/lld/COFF/Symbols.h
+++ b/lld/COFF/Symbols.h
@@ -100,7 +100,7 @@ class Symbol {
       : symbolKind(k), isExternal(true), isCOMDAT(false),
         writtenToSymtab(false), isUsedInRegularObj(false),
         pendingArchiveLoad(false), isGCRoot(false), isRuntimePseudoReloc(false),
-        deferUndefined(false), canInline(true), isWeak(false),
+        deferUndefined(false), canInline(true), isWeak(false), isAntiDep(false),
         nameSize(n.size()), nameData(n.empty() ? nullptr : n.data()) {
     assert((!n.empty() || k <= LastDefinedCOFFKind) &&
            "If the name is empty, the Symbol must be a DefinedCOFF.");
@@ -145,6 +145,9 @@ class Symbol {
   // managing weak symbol overrides.
   unsigned isWeak : 1;
 
+  // True if the symbol is an anti-dependency.
+  unsigned isAntiDep : 1;
+
 protected:
   // Symbol name length. Assume symbol lengths fit in a 32-bit integer.
   uint32_t nameSize;
@@ -345,6 +348,11 @@ class Undefined : public Symbol {
     return dyn_cast_or_null<Defined>(getWeakAlias());
   }
 
+  void setWeakAlias(Symbol *sym, bool antiDep = false) {
+    weakAlias = sym;
+    isAntiDep = antiDep;
+  }
+
   // If this symbol is external weak, replace this object with aliased symbol.
   bool resolveWeakAlias();
 };
diff --git a/lld/test/COFF/weak-antidep-chain.test b/lld/test/COFF/weak-antidep-chain.test
new file mode 100644
index 00000000000000..f6e32c5139a41e
--- /dev/null
+++ b/lld/test/COFF/weak-antidep-chain.test
@@ -0,0 +1,68 @@
+REQUIRES: x86
+RUN: split-file %s %t.dir && cd %t.dir
+
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows chain-bad.s -o chain-bad.obj
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows chain-bad2.s -o chain-bad2.obj
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows globals-bad.s -o globals-bad.obj
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows chain-good.s -o chain-good.obj
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows chain-good2.s -o chain-good2.obj
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows globals-good.s -o globals-good.obj
+
+Temporary anti-dependency chains are allowed as long as they are broken by non-alias symbols.
+
+RUN: lld-link -machine:amd64 -dll -noentry -out:test.dll chain-good.obj globals-good.obj
+RUN: lld-link -machine:amd64 -dll -noentry -out:test.dll chain-good2.obj globals-good.obj
+
+Chaining of anti-dependency symbols is not allowed.
+
+RUN: not lld-link -machine:amd64 -dll -noentry -out:test.dll chain-bad.obj globals-bad.obj 2>&1 \
+RUN:              | FileCheck -check-prefix=ANTIDEP %s
+RUN: not lld-link -machine:amd64 -dll -noentry -out:test.dll chain-bad2.obj globals-bad.obj 2>&1 \
+RUN:              | FileCheck -check-prefix=ANTIDEP %s
+
+ANTIDEP:      lld-link: error: undefined symbol: sym
+ANTIDEP-NEXT: >>> referenced by chain-bad
+
+#--- chain-bad.s
+    .weak_anti_dep sym
+.set sym,sym2
+    .weak_anti_dep sym2
+.set sym2,sym3
+
+#--- chain-bad2.s
+    .weak_anti_dep sym2
+.set sym2,sym3
+    .weak sym
+.set sym,sym2
+
+#--- globals-bad.s
+    .section .test,"r"
+    .global sym3
+.set sym3,3
+
+#--- chain-good.s
+    .weak_anti_dep sym
+.set sym,sym2
+    .weak_anti_dep sym2
+.set sym2,sym3
+    .weak_anti_dep sym3
+.set sym3,sym4
+    .weak_anti_dep sym4
+
+#--- chain-good2.s
+    .weak_anti_dep sym
+.set sym,sym2
+    .weak_anti_dep sym2
+.set sym2,sym3
+    .weak_anti_dep sym3
+.set sym3,weak_sym
+    .weak weak_sym
+.set weak_sym,sym4
+    .weak_anti_dep sym4
+
+#--- globals-good.s
+    .section .test,"r"
+    .global sym2
+.set sym2,2
+    .global sym4
+.set sym4,4
diff --git a/lld/test/COFF/weak-antidep.test b/lld/test/COFF/weak-antidep.test
new file mode 100644
index 00000000000000..691aa8f184337b
--- /dev/null
+++ b/lld/test/COFF/weak-antidep.test
@@ -0,0 +1,54 @@
+REQUIRES: x86
+RUN: split-file %s %t.dir && cd %t.dir
+
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows antidep.s -o antidep.obj
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows antidep2.s -o antidep2.obj
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows weak.s -o weak.obj
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows test.s -o test.obj
+
+Check that a regular weak alias overrides an anti-dependency symbol.
+
+RUN: lld-link -dll -noentry -out:out1.dll antidep.obj weak.obj test.obj
+RUN: llvm-readobj --hex-dump=.test out1.dll | FileCheck --check-prefix=CHECK2 %s
+
+RUN: lld-link -dll -noentry -out:out2.dll weak.obj antidep.obj test.obj
+RUN: llvm-readobj --hex-dump=.test out2.dll | FileCheck --check-prefix=CHECK2 %s
+
+RUN: lld-link -dll -noentry -out:out3.dll antidep.obj weak.obj test.obj -lld-allow-duplicate-weak
+RUN: llvm-readobj --hex-dump=.test out3.dll | FileCheck --check-prefix=CHECK2 %s
+
+RUN: lld-link -dll -noentry -out:out4.dll weak.obj antidep.obj test.obj -lld-allow-duplicate-weak
+RUN: llvm-readobj --hex-dump=.test out4.dll | FileCheck --check-prefix=CHECK2 %s
+
+When an anti-dependency symbol is duplicated, the first definition takes precedence over subsequent ones.
+
+RUN: lld-link -dll -noentry -out:out5.dll antidep.obj antidep2.obj test.obj
+RUN: llvm-readobj --hex-dump=.test out5.dll | FileCheck --check-prefix=CHECK1 %s
+
+RUN: lld-link -dll -noentry -out:out6.dll antidep2.obj antidep.obj test.obj
+RUN: llvm-readobj --hex-dump=.test out6.dll | FileCheck --check-prefix=CHECK2 %s
+
+CHECK1: 01000000
+CHECK2: 02000000
+
+#--- antidep.s
+     .weak_anti_dep sym
+.set sym,target1
+
+#--- antidep2.s
+     .weak_anti_dep sym
+.set sym,target2
+
+#--- weak.s
+     .weak sym
+.set sym,target2
+
+#--- test.s
+     .section .target,"dr"
+     .globl target1
+.set target1,1
+     .globl target2
+.set target2,2
+
+     .section .test,"dr"
+     .long sym



More information about the llvm-commits mailing list