[lld] 5b8da10 - [lld-macho] Add support for N_INDR symbols

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 15 05:35:52 PDT 2022


Author: Jez Ng
Date: 2022-09-15T08:35:24-04:00
New Revision: 5b8da10b87f7009c06215449e4a9c61dab91697a

URL: https://github.com/llvm/llvm-project/commit/5b8da10b87f7009c06215449e4a9c61dab91697a
DIFF: https://github.com/llvm/llvm-project/commit/5b8da10b87f7009c06215449e4a9c61dab91697a.diff

LOG: [lld-macho] Add support for N_INDR symbols

This is similar to the `-alias` CLI option, but it gives finer-grained
control in that it allows the aliased symbols to be treated as private
externs.

While working on this, I realized that our `-alias` handling did not
cover the cases where the aliased symbol is a common or dylib symbol,
nor the case where we have an undefined that gets treated specially and
converted to a defined later on. My N_INDR handling neglects this too
for now; I've added checks and TODO messages for these.

`N_INDR` symbols cropped up as part of our attempt to link swift-stdlib.

Reviewed By: #lld-macho, thakis, thevinster

Differential Revision: https://reviews.llvm.org/D133825

Added: 
    lld/test/MachO/alias-option.s
    lld/test/MachO/alias-symbols.s

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/SymbolTable.cpp
    lld/MachO/SymbolTable.h
    lld/MachO/Symbols.h
    lld/docs/MachO/ld64-vs-lld.rst

Removed: 
    lld/test/MachO/aliases.s


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 533273e73bb9a..6bec1086f915d 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1200,13 +1200,40 @@ static void createAliases() {
   for (const auto &pair : config->aliasedSymbols) {
     if (const auto &sym = symtab->find(pair.first)) {
       if (const auto &defined = dyn_cast<Defined>(sym)) {
-        symtab->aliasDefined(defined, pair.second);
-        continue;
+        symtab->aliasDefined(defined, pair.second, defined->getFile());
+      } else {
+        error("TODO: support aliasing to symbols of kind " +
+              Twine(sym->kind()));
       }
+    } else {
+      warn("undefined base symbol '" + pair.first + "' for alias '" +
+           pair.second + "'\n");
     }
+  }
 
-    warn("undefined base symbol '" + pair.first + "' for alias '" +
-         pair.second + "'\n");
+  for (const InputFile *file : inputFiles) {
+    if (auto *objFile = dyn_cast<ObjFile>(file)) {
+      for (const AliasSymbol *alias : objFile->aliases) {
+        if (const auto &aliased = symtab->find(alias->getAliasedName())) {
+          if (const auto &defined = dyn_cast<Defined>(aliased)) {
+            symtab->aliasDefined(defined, alias->getName(), alias->getFile(),
+                                 alias->privateExtern);
+          } else {
+            // Common, dylib, and undefined symbols are all valid alias
+            // referents (undefineds can become valid Defined symbols later on
+            // in the link.)
+            error("TODO: support aliasing to symbols of kind " +
+                  Twine(aliased->kind()));
+          }
+        } else {
+          // This shouldn't happen since MC generates undefined symbols to
+          // represent the alias referents. Thus we fatal() instead of just
+          // warning here.
+          fatal("unable to find alias referent " + alias->getAliasedName() +
+                " for " + alias->getName());
+        }
+      }
+    }
   }
 }
 

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index f68985f99b47c..588b87a2927f9 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -872,7 +872,8 @@ static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
 
 template <class NList>
 macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
-                                              StringRef name) {
+                                              const char *strtab) {
+  StringRef name = StringRef(strtab + sym.n_strx);
   uint8_t type = sym.n_type & N_TYPE;
   bool isPrivateExtern = sym.n_type & N_PEXT || forceHidden;
   switch (type) {
@@ -884,9 +885,20 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
                                    isPrivateExtern);
   case N_ABS:
     return createAbsolute(sym, this, name, forceHidden);
+  case N_INDR: {
+    // Not much point in making local aliases -- relocs in the current file can
+    // just refer to the actual symbol itself. ld64 ignores these symbols too.
+    if (!(sym.n_type & N_EXT))
+      return nullptr;
+    StringRef aliasedName = StringRef(strtab + sym.n_value);
+    // isPrivateExtern is the only symbol flag that has an impact on the final
+    // aliased symbol.
+    auto alias = make<AliasSymbol>(this, name, aliasedName, isPrivateExtern);
+    aliases.push_back(alias);
+    return alias;
+  }
   case N_PBUD:
-  case N_INDR:
-    error("TODO: support symbols of type " + std::to_string(type));
+    error("TODO: support symbols of type N_PBUD");
     return nullptr;
   case N_SECT:
     llvm_unreachable(
@@ -927,7 +939,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
     } else if (isUndef(sym)) {
       undefineds.push_back(i);
     } else {
-      symbols[i] = parseNonSectionSymbol(sym, StringRef(strtab + sym.n_strx));
+      symbols[i] = parseNonSectionSymbol(sym, strtab);
     }
   }
 
@@ -1026,11 +1038,8 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
   // symbol resolution behavior. In addition, a set of interconnected symbols
   // will all be resolved to the same file, instead of being resolved to
   // 
diff erent files.
-  for (unsigned i : undefineds) {
-    const NList &sym = nList[i];
-    StringRef name = strtab + sym.n_strx;
-    symbols[i] = parseNonSectionSymbol(sym, name);
-  }
+  for (unsigned i : undefineds)
+    symbols[i] = parseNonSectionSymbol(nList[i], strtab);
 }
 
 OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 89172922a0a75..1b454f98932a8 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -44,6 +44,7 @@ struct PlatformInfo;
 class ConcatInputSection;
 class Symbol;
 class Defined;
+class AliasSymbol;
 struct Reloc;
 enum class RefState : uint8_t;
 
@@ -176,6 +177,7 @@ class ObjFile final : public InputFile {
   std::vector<CallGraphEntry> callGraph;
   llvm::DenseMap<ConcatInputSection *, FDE> fdes;
   std::vector<OptimizationHint> optimizationHints;
+  std::vector<AliasSymbol *> aliases;
 
 private:
   llvm::once_flag initDwarf;
@@ -186,7 +188,7 @@ class ObjFile final : public InputFile {
                     ArrayRef<typename LP::nlist> nList, const char *strtab,
                     bool subsectionsViaSymbols);
   template <class NList>
-  Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
+  Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab);
   template <class SectionHeader>
   void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
                         const SectionHeader &, Section &);

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index de4af3c8a9456..cc0f1d5bd917b 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -115,9 +115,11 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
   return defined;
 }
 
-Defined *SymbolTable::aliasDefined(Defined *src, StringRef target) {
-  return addDefined(target, src->getFile(), src->isec, src->value, src->size,
-                    src->isWeakDef(), src->privateExtern, src->thumb,
+Defined *SymbolTable::aliasDefined(Defined *src, StringRef target,
+                                   InputFile *newFile, bool makePrivateExtern) {
+  bool isPrivateExtern = makePrivateExtern || src->privateExtern;
+  return addDefined(target, newFile, src->isec, src->value, src->size,
+                    src->isWeakDef(), isPrivateExtern, src->thumb,
                     src->referencedDynamically, src->noDeadStrip,
                     src->weakDefCanBeHidden);
 }

diff  --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index a393f9c918087..d90245c7aaa0e 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -42,7 +42,8 @@ class SymbolTable {
                       bool isReferencedDynamically, bool noDeadStrip,
                       bool isWeakDefCanBeHidden);
 
-  Defined *aliasDefined(Defined *src, StringRef target);
+  Defined *aliasDefined(Defined *src, StringRef target, InputFile *newFile,
+                        bool makePrivateExtern = false);
 
   Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef);
 

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 9d3b56a7ae269..6773170e028a6 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -39,6 +39,7 @@ class Symbol {
     DylibKind,
     LazyArchiveKind,
     LazyObjectKind,
+    AliasKind,
   };
 
   virtual ~Symbol() {}
@@ -321,6 +322,26 @@ class LazyObject : public Symbol {
   static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
 };
 
+// Represents N_INDR symbols. Note that if we are given valid, linkable inputs,
+// then all AliasSymbol instances will be converted into one of the other Symbol
+// types after `createAliases()` runs.
+class AliasSymbol final : public Symbol {
+public:
+  AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName,
+              bool isPrivateExtern)
+      : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern),
+        aliasedName(aliasedName) {}
+
+  StringRef getAliasedName() const { return aliasedName; }
+
+  static bool classof(const Symbol *s) { return s->kind() == AliasKind; }
+
+  const bool privateExtern;
+
+private:
+  StringRef aliasedName;
+};
+
 union SymbolUnion {
   alignas(Defined) char a[sizeof(Defined)];
   alignas(Undefined) char b[sizeof(Undefined)];
@@ -328,6 +349,7 @@ union SymbolUnion {
   alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
   alignas(LazyArchive) char e[sizeof(LazyArchive)];
   alignas(LazyObject) char f[sizeof(LazyObject)];
+  alignas(AliasSymbol) char g[sizeof(AliasSymbol)];
 };
 
 template <typename T, typename... ArgT>

diff  --git a/lld/docs/MachO/ld64-vs-lld.rst b/lld/docs/MachO/ld64-vs-lld.rst
index 4f3ae696bb962..f571e6a62b36f 100644
--- a/lld/docs/MachO/ld64-vs-lld.rst
+++ b/lld/docs/MachO/ld64-vs-lld.rst
@@ -37,3 +37,12 @@ archives.
       symbol" error.
 - LLD: Duplicate symbols, regardless of which archives they are from, will
   raise errors.
+
+Aliases
+=======
+ld64 treats all aliases as strong extern definitions. Having two aliases of the
+same name, or an alias plus a regular extern symbol of the same name, both
+result in duplicate symbol errors. LLD does not check for duplicate aliases;
+instead we perform alias resolution first, and only then do we check for
+duplicate symbols. In particular, we will not report a duplicate symbol error if
+the aliased symbols turn out to be weak definitions, but ld64 will.

diff  --git a/lld/test/MachO/aliases.s b/lld/test/MachO/alias-option.s
similarity index 100%
rename from lld/test/MachO/aliases.s
rename to lld/test/MachO/alias-option.s

diff  --git a/lld/test/MachO/alias-symbols.s b/lld/test/MachO/alias-symbols.s
new file mode 100644
index 0000000000000..ef22c87a647f9
--- /dev/null
+++ b/lld/test/MachO/alias-symbols.s
@@ -0,0 +1,136 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/aliases.s -o %t/aliases.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/definitions.s -o %t/definitions.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-extern-alias-to-weak.s -o %t/weak-extern-alias-to-weak.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-extern-alias-to-strong.s -o %t/weak-extern-alias-to-strong.o
+
+# RUN: %lld -lSystem %t/aliases.o %t/definitions.o -o %t/out
+# RUN: llvm-objdump --macho --syms %t/out | FileCheck %s
+
+## local aliases should be dropped entirely. --implicit-check-not doesn't seem
+## to work well with -DAG matches, so we check for _local_alias' absence in a
+## separate step.
+# RUN: llvm-objdump --macho --syms %t/out | FileCheck /dev/null --implicit-check-not _local_alias
+
+# CHECK-DAG: [[#%.16x,STRONG:]] g     F __TEXT,__text _strong
+# CHECK-DAG: [[#%.16x,WEAK_1:]]  w    F __TEXT,__text _weak_1
+# CHECK-DAG: [[#%.16x,PEXT:]]   l     F __TEXT,__text _pext
+# CHECK-DAG: [[#%.16x,DEAD:]]   g     F __TEXT,__text _dead
+# CHECK-DAG: [[#STRONG]]        l     F __TEXT,__text _pext_alias
+# CHECK-DAG: [[#PEXT]]          l     F __TEXT,__text _alias_to_pext
+# CHECK-DAG: [[#STRONG]]        g     F __TEXT,__text _extern_alias_to_strong
+# CHECK-DAG: [[#WEAK_1]]         w    F __TEXT,__text _weak_extern_alias_to_weak
+# CHECK-DAG: [[#DEAD]]          g     F __TEXT,__text _no_dead_strip_alias
+# CHECK-DAG: [[#STRONG]]        g     F __TEXT,__text _weak_extern_alias_to_strong
+
+# RUN: %lld -lSystem -dead_strip %t/aliases.o %t/definitions.o -o %t/dead-stripped
+# RUN: llvm-objdump --macho --syms %t/dead-stripped | FileCheck %s --check-prefix=STRIPPED
+
+# STRIPPED:       SYMBOL TABLE:
+# STRIPPED-NEXT:  g     F __TEXT,__text _main
+# STRIPPED-NEXT:  g     F __TEXT,__text __mh_execute_header
+# STRIPPED-NEXT:          *UND* dyld_stub_binder
+# STRIPPED-EMPTY:
+
+# RUN: not %lld -lSystem %t/aliases.o %t/definitions.o \
+# RUN:   %t/weak-extern-alias-to-strong.o -o /dev/null 2>&1
+
+## Verify that we preserve the file names of the aliases, rather than using the
+## filename of the aliased symbols.
+# DUP:      error: duplicate symbol: _weak_extern_alias_to_weak
+# DUP-NEXT: >>> defined in {{.*}}aliases.o
+# DUP-NEXT: >>> defined in {{.*}}weak-extern-alias-to-weak.o
+
+## The following cases are actually all dup symbol errors under ld64. Alias
+## symbols are treated like strong extern symbols by ld64 even if the symbol they alias
+## is actually weak. LLD OTOH does not check for dup symbols until after
+## resolving the aliases; this makes for a simpler implementation.
+## The following test cases are meant to elucidate what LLD's behavior is, but
+## we should feel free to change it in the future should it be helpful for the
+## implementation.
+
+# RUN: %lld -lSystem %t/aliases.o %t/definitions.o \
+# RUN:   %t/weak-extern-alias-to-weak.o -o %t/alias-clash-1
+# RUN: llvm-objdump --macho --syms %t/alias-clash-1 | FileCheck %s --check-prefix WEAK-1
+
+# RUN: %lld -lSystem %t/weak-extern-alias-to-weak.o %t/aliases.o \
+# RUN:   %t/definitions.o -o %t/alias-clash-2
+# RUN: llvm-objdump --macho --syms %t/alias-clash-2 | FileCheck %s --check-prefix WEAK-2
+
+# RUN: %lld -lSystem %t/aliases.o %t/definitions.o \
+# RUN:   -alias _weak_2 _weak_extern_alias_to_weak -o %t/opt-vs-symbol
+# RUN: llvm-objdump --macho --syms %t/opt-vs-symbol | FileCheck %s --check-prefix WEAK-2
+
+# RUN: %lld -lSystem -alias _weak_2 _weak_extern_alias_to_weak %t/aliases.o \
+# RUN:   %t/definitions.o -o %t/opt-vs-symbol
+# RUN: llvm-objdump --macho --syms %t/opt-vs-symbol | FileCheck %s --check-prefix WEAK-2
+
+# WEAK-1-DAG: [[#%.16x,WEAK_1:]]  w    F __TEXT,__text _weak_1
+# WEAK-1-DAG: [[#WEAK_1]]         w    F __TEXT,__text _weak_extern_alias_to_weak
+
+# WEAK-2-DAG: [[#%.16x,WEAK_2:]]  w    F __TEXT,__text _weak_2
+# WEAK-2-DAG: [[#WEAK_2]]         w    F __TEXT,__text _weak_extern_alias_to_weak
+
+#--- aliases.s
+.globl _extern_alias_to_strong, _weak_extern_alias_to_weak
+.weak_definition _weak_extern_alias_to_weak
+
+## Private extern aliases result in local symbols in the output (i.e. it is as
+## if the aliased symbol is also private extern.)
+.private_extern _pext_alias
+
+## This test case demonstrates that it doesn't matter whether the alias itself
+## is strong or weak. Rather, what matters is whether the aliased symbol is
+## strong or weak.
+.globl _weak_extern_alias_to_strong
+.weak_definition _weak_extern_alias_to_strong
+
+## no_dead_strip doesn't retain the aliased symbol if it is dead
+.globl _no_dead_strip_alias
+.no_dead_strip _no_dead_strip_alias
+
+.globl _alias_to_pext
+_alias_to_pext = _pext
+
+_extern_alias_to_strong = _strong
+_weak_extern_alias_to_weak = _weak_1
+_weak_extern_alias_to_strong = _strong
+
+_pext_alias = _strong
+_local_alias = _strong
+_no_dead_strip_alias = _dead
+
+.subsections_via_symbols
+
+#--- weak-extern-alias-to-weak.s
+.globl _weak_extern_alias_to_weak
+.weak_definition _weak_extern_alias_to_weak
+_weak_extern_alias_to_weak = _weak_2
+
+#--- weak-extern-alias-to-strong.s
+.globl _weak_extern_alias_to_strong
+.weak_definition _weak_extern_alias_to_strong
+_weak_extern_alias_to_strong = _strong
+
+#--- definitions.s
+.globl _strong, _weak_1, _weak_2, _dead
+.private_extern _pext
+.weak_definition _weak_1
+.weak_definition _weak_2
+
+_strong:
+  .space 1
+_weak_1:
+  .space 1
+_weak_2:
+  .space 1
+_dead:
+  .space 1
+_pext:
+  .space 1
+
+.globl _main
+_main:
+
+.subsections_via_symbols


        


More information about the llvm-commits mailing list