[lld] 5b8da10 - [lld-macho] Add support for N_INDR symbols
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 15 05:35:52 PDT 2022
Author: Jez Ng
Date: 2022-09-15T08:35:24-04:00
New Revision: 5b8da10b87f7009c06215449e4a9c61dab91697a
URL: https://github.com/llvm/llvm-project/commit/5b8da10b87f7009c06215449e4a9c61dab91697a
DIFF: https://github.com/llvm/llvm-project/commit/5b8da10b87f7009c06215449e4a9c61dab91697a.diff
LOG: [lld-macho] Add support for N_INDR symbols
This is similar to the `-alias` CLI option, but it gives finer-grained
control in that it allows the aliased symbols to be treated as private
externs.
While working on this, I realized that our `-alias` handling did not
cover the cases where the aliased symbol is a common or dylib symbol,
nor the case where we have an undefined that gets treated specially and
converted to a defined later on. My N_INDR handling neglects this too
for now; I've added checks and TODO messages for these.
`N_INDR` symbols cropped up as part of our attempt to link swift-stdlib.
Reviewed By: #lld-macho, thakis, thevinster
Differential Revision: https://reviews.llvm.org/D133825
Added:
lld/test/MachO/alias-option.s
lld/test/MachO/alias-symbols.s
Modified:
lld/MachO/Driver.cpp
lld/MachO/InputFiles.cpp
lld/MachO/InputFiles.h
lld/MachO/SymbolTable.cpp
lld/MachO/SymbolTable.h
lld/MachO/Symbols.h
lld/docs/MachO/ld64-vs-lld.rst
Removed:
lld/test/MachO/aliases.s
################################################################################
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 533273e73bb9a..6bec1086f915d 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1200,13 +1200,40 @@ static void createAliases() {
for (const auto &pair : config->aliasedSymbols) {
if (const auto &sym = symtab->find(pair.first)) {
if (const auto &defined = dyn_cast<Defined>(sym)) {
- symtab->aliasDefined(defined, pair.second);
- continue;
+ symtab->aliasDefined(defined, pair.second, defined->getFile());
+ } else {
+ error("TODO: support aliasing to symbols of kind " +
+ Twine(sym->kind()));
}
+ } else {
+ warn("undefined base symbol '" + pair.first + "' for alias '" +
+ pair.second + "'\n");
}
+ }
- warn("undefined base symbol '" + pair.first + "' for alias '" +
- pair.second + "'\n");
+ for (const InputFile *file : inputFiles) {
+ if (auto *objFile = dyn_cast<ObjFile>(file)) {
+ for (const AliasSymbol *alias : objFile->aliases) {
+ if (const auto &aliased = symtab->find(alias->getAliasedName())) {
+ if (const auto &defined = dyn_cast<Defined>(aliased)) {
+ symtab->aliasDefined(defined, alias->getName(), alias->getFile(),
+ alias->privateExtern);
+ } else {
+ // Common, dylib, and undefined symbols are all valid alias
+ // referents (undefineds can become valid Defined symbols later on
+ // in the link.)
+ error("TODO: support aliasing to symbols of kind " +
+ Twine(aliased->kind()));
+ }
+ } else {
+ // This shouldn't happen since MC generates undefined symbols to
+ // represent the alias referents. Thus we fatal() instead of just
+ // warning here.
+ fatal("unable to find alias referent " + alias->getAliasedName() +
+ " for " + alias->getName());
+ }
+ }
+ }
}
}
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index f68985f99b47c..588b87a2927f9 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -872,7 +872,8 @@ static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
template <class NList>
macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
- StringRef name) {
+ const char *strtab) {
+ StringRef name = StringRef(strtab + sym.n_strx);
uint8_t type = sym.n_type & N_TYPE;
bool isPrivateExtern = sym.n_type & N_PEXT || forceHidden;
switch (type) {
@@ -884,9 +885,20 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
isPrivateExtern);
case N_ABS:
return createAbsolute(sym, this, name, forceHidden);
+ case N_INDR: {
+ // Not much point in making local aliases -- relocs in the current file can
+ // just refer to the actual symbol itself. ld64 ignores these symbols too.
+ if (!(sym.n_type & N_EXT))
+ return nullptr;
+ StringRef aliasedName = StringRef(strtab + sym.n_value);
+ // isPrivateExtern is the only symbol flag that has an impact on the final
+ // aliased symbol.
+ auto alias = make<AliasSymbol>(this, name, aliasedName, isPrivateExtern);
+ aliases.push_back(alias);
+ return alias;
+ }
case N_PBUD:
- case N_INDR:
- error("TODO: support symbols of type " + std::to_string(type));
+ error("TODO: support symbols of type N_PBUD");
return nullptr;
case N_SECT:
llvm_unreachable(
@@ -927,7 +939,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
} else if (isUndef(sym)) {
undefineds.push_back(i);
} else {
- symbols[i] = parseNonSectionSymbol(sym, StringRef(strtab + sym.n_strx));
+ symbols[i] = parseNonSectionSymbol(sym, strtab);
}
}
@@ -1026,11 +1038,8 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
// symbol resolution behavior. In addition, a set of interconnected symbols
// will all be resolved to the same file, instead of being resolved to
//
diff erent files.
- for (unsigned i : undefineds) {
- const NList &sym = nList[i];
- StringRef name = strtab + sym.n_strx;
- symbols[i] = parseNonSectionSymbol(sym, name);
- }
+ for (unsigned i : undefineds)
+ symbols[i] = parseNonSectionSymbol(nList[i], strtab);
}
OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 89172922a0a75..1b454f98932a8 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -44,6 +44,7 @@ struct PlatformInfo;
class ConcatInputSection;
class Symbol;
class Defined;
+class AliasSymbol;
struct Reloc;
enum class RefState : uint8_t;
@@ -176,6 +177,7 @@ class ObjFile final : public InputFile {
std::vector<CallGraphEntry> callGraph;
llvm::DenseMap<ConcatInputSection *, FDE> fdes;
std::vector<OptimizationHint> optimizationHints;
+ std::vector<AliasSymbol *> aliases;
private:
llvm::once_flag initDwarf;
@@ -186,7 +188,7 @@ class ObjFile final : public InputFile {
ArrayRef<typename LP::nlist> nList, const char *strtab,
bool subsectionsViaSymbols);
template <class NList>
- Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
+ Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab);
template <class SectionHeader>
void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
const SectionHeader &, Section &);
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index de4af3c8a9456..cc0f1d5bd917b 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -115,9 +115,11 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
return defined;
}
-Defined *SymbolTable::aliasDefined(Defined *src, StringRef target) {
- return addDefined(target, src->getFile(), src->isec, src->value, src->size,
- src->isWeakDef(), src->privateExtern, src->thumb,
+Defined *SymbolTable::aliasDefined(Defined *src, StringRef target,
+ InputFile *newFile, bool makePrivateExtern) {
+ bool isPrivateExtern = makePrivateExtern || src->privateExtern;
+ return addDefined(target, newFile, src->isec, src->value, src->size,
+ src->isWeakDef(), isPrivateExtern, src->thumb,
src->referencedDynamically, src->noDeadStrip,
src->weakDefCanBeHidden);
}
diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index a393f9c918087..d90245c7aaa0e 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -42,7 +42,8 @@ class SymbolTable {
bool isReferencedDynamically, bool noDeadStrip,
bool isWeakDefCanBeHidden);
- Defined *aliasDefined(Defined *src, StringRef target);
+ Defined *aliasDefined(Defined *src, StringRef target, InputFile *newFile,
+ bool makePrivateExtern = false);
Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef);
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 9d3b56a7ae269..6773170e028a6 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -39,6 +39,7 @@ class Symbol {
DylibKind,
LazyArchiveKind,
LazyObjectKind,
+ AliasKind,
};
virtual ~Symbol() {}
@@ -321,6 +322,26 @@ class LazyObject : public Symbol {
static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
};
+// Represents N_INDR symbols. Note that if we are given valid, linkable inputs,
+// then all AliasSymbol instances will be converted into one of the other Symbol
+// types after `createAliases()` runs.
+class AliasSymbol final : public Symbol {
+public:
+ AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName,
+ bool isPrivateExtern)
+ : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern),
+ aliasedName(aliasedName) {}
+
+ StringRef getAliasedName() const { return aliasedName; }
+
+ static bool classof(const Symbol *s) { return s->kind() == AliasKind; }
+
+ const bool privateExtern;
+
+private:
+ StringRef aliasedName;
+};
+
union SymbolUnion {
alignas(Defined) char a[sizeof(Defined)];
alignas(Undefined) char b[sizeof(Undefined)];
@@ -328,6 +349,7 @@ union SymbolUnion {
alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
alignas(LazyArchive) char e[sizeof(LazyArchive)];
alignas(LazyObject) char f[sizeof(LazyObject)];
+ alignas(AliasSymbol) char g[sizeof(AliasSymbol)];
};
template <typename T, typename... ArgT>
diff --git a/lld/docs/MachO/ld64-vs-lld.rst b/lld/docs/MachO/ld64-vs-lld.rst
index 4f3ae696bb962..f571e6a62b36f 100644
--- a/lld/docs/MachO/ld64-vs-lld.rst
+++ b/lld/docs/MachO/ld64-vs-lld.rst
@@ -37,3 +37,12 @@ archives.
symbol" error.
- LLD: Duplicate symbols, regardless of which archives they are from, will
raise errors.
+
+Aliases
+=======
+ld64 treats all aliases as strong extern definitions. Having two aliases of the
+same name, or an alias plus a regular extern symbol of the same name, both
+result in duplicate symbol errors. LLD does not check for duplicate aliases;
+instead we perform alias resolution first, and only then do we check for
+duplicate symbols. In particular, we will not report a duplicate symbol error if
+the aliased symbols turn out to be weak definitions, but ld64 will.
diff --git a/lld/test/MachO/aliases.s b/lld/test/MachO/alias-option.s
similarity index 100%
rename from lld/test/MachO/aliases.s
rename to lld/test/MachO/alias-option.s
diff --git a/lld/test/MachO/alias-symbols.s b/lld/test/MachO/alias-symbols.s
new file mode 100644
index 0000000000000..ef22c87a647f9
--- /dev/null
+++ b/lld/test/MachO/alias-symbols.s
@@ -0,0 +1,136 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/aliases.s -o %t/aliases.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/definitions.s -o %t/definitions.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-extern-alias-to-weak.s -o %t/weak-extern-alias-to-weak.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-extern-alias-to-strong.s -o %t/weak-extern-alias-to-strong.o
+
+# RUN: %lld -lSystem %t/aliases.o %t/definitions.o -o %t/out
+# RUN: llvm-objdump --macho --syms %t/out | FileCheck %s
+
+## local aliases should be dropped entirely. --implicit-check-not doesn't seem
+## to work well with -DAG matches, so we check for _local_alias' absence in a
+## separate step.
+# RUN: llvm-objdump --macho --syms %t/out | FileCheck /dev/null --implicit-check-not _local_alias
+
+# CHECK-DAG: [[#%.16x,STRONG:]] g F __TEXT,__text _strong
+# CHECK-DAG: [[#%.16x,WEAK_1:]] w F __TEXT,__text _weak_1
+# CHECK-DAG: [[#%.16x,PEXT:]] l F __TEXT,__text _pext
+# CHECK-DAG: [[#%.16x,DEAD:]] g F __TEXT,__text _dead
+# CHECK-DAG: [[#STRONG]] l F __TEXT,__text _pext_alias
+# CHECK-DAG: [[#PEXT]] l F __TEXT,__text _alias_to_pext
+# CHECK-DAG: [[#STRONG]] g F __TEXT,__text _extern_alias_to_strong
+# CHECK-DAG: [[#WEAK_1]] w F __TEXT,__text _weak_extern_alias_to_weak
+# CHECK-DAG: [[#DEAD]] g F __TEXT,__text _no_dead_strip_alias
+# CHECK-DAG: [[#STRONG]] g F __TEXT,__text _weak_extern_alias_to_strong
+
+# RUN: %lld -lSystem -dead_strip %t/aliases.o %t/definitions.o -o %t/dead-stripped
+# RUN: llvm-objdump --macho --syms %t/dead-stripped | FileCheck %s --check-prefix=STRIPPED
+
+# STRIPPED: SYMBOL TABLE:
+# STRIPPED-NEXT: g F __TEXT,__text _main
+# STRIPPED-NEXT: g F __TEXT,__text __mh_execute_header
+# STRIPPED-NEXT: *UND* dyld_stub_binder
+# STRIPPED-EMPTY:
+
+# RUN: not %lld -lSystem %t/aliases.o %t/definitions.o \
+# RUN: %t/weak-extern-alias-to-strong.o -o /dev/null 2>&1
+
+## Verify that we preserve the file names of the aliases, rather than using the
+## filename of the aliased symbols.
+# DUP: error: duplicate symbol: _weak_extern_alias_to_weak
+# DUP-NEXT: >>> defined in {{.*}}aliases.o
+# DUP-NEXT: >>> defined in {{.*}}weak-extern-alias-to-weak.o
+
+## The following cases are actually all dup symbol errors under ld64. Alias
+## symbols are treated like strong extern symbols by ld64 even if the symbol they alias
+## is actually weak. LLD OTOH does not check for dup symbols until after
+## resolving the aliases; this makes for a simpler implementation.
+## The following test cases are meant to elucidate what LLD's behavior is, but
+## we should feel free to change it in the future should it be helpful for the
+## implementation.
+
+# RUN: %lld -lSystem %t/aliases.o %t/definitions.o \
+# RUN: %t/weak-extern-alias-to-weak.o -o %t/alias-clash-1
+# RUN: llvm-objdump --macho --syms %t/alias-clash-1 | FileCheck %s --check-prefix WEAK-1
+
+# RUN: %lld -lSystem %t/weak-extern-alias-to-weak.o %t/aliases.o \
+# RUN: %t/definitions.o -o %t/alias-clash-2
+# RUN: llvm-objdump --macho --syms %t/alias-clash-2 | FileCheck %s --check-prefix WEAK-2
+
+# RUN: %lld -lSystem %t/aliases.o %t/definitions.o \
+# RUN: -alias _weak_2 _weak_extern_alias_to_weak -o %t/opt-vs-symbol
+# RUN: llvm-objdump --macho --syms %t/opt-vs-symbol | FileCheck %s --check-prefix WEAK-2
+
+# RUN: %lld -lSystem -alias _weak_2 _weak_extern_alias_to_weak %t/aliases.o \
+# RUN: %t/definitions.o -o %t/opt-vs-symbol
+# RUN: llvm-objdump --macho --syms %t/opt-vs-symbol | FileCheck %s --check-prefix WEAK-2
+
+# WEAK-1-DAG: [[#%.16x,WEAK_1:]] w F __TEXT,__text _weak_1
+# WEAK-1-DAG: [[#WEAK_1]] w F __TEXT,__text _weak_extern_alias_to_weak
+
+# WEAK-2-DAG: [[#%.16x,WEAK_2:]] w F __TEXT,__text _weak_2
+# WEAK-2-DAG: [[#WEAK_2]] w F __TEXT,__text _weak_extern_alias_to_weak
+
+#--- aliases.s
+.globl _extern_alias_to_strong, _weak_extern_alias_to_weak
+.weak_definition _weak_extern_alias_to_weak
+
+## Private extern aliases result in local symbols in the output (i.e. it is as
+## if the aliased symbol is also private extern.)
+.private_extern _pext_alias
+
+## This test case demonstrates that it doesn't matter whether the alias itself
+## is strong or weak. Rather, what matters is whether the aliased symbol is
+## strong or weak.
+.globl _weak_extern_alias_to_strong
+.weak_definition _weak_extern_alias_to_strong
+
+## no_dead_strip doesn't retain the aliased symbol if it is dead
+.globl _no_dead_strip_alias
+.no_dead_strip _no_dead_strip_alias
+
+.globl _alias_to_pext
+_alias_to_pext = _pext
+
+_extern_alias_to_strong = _strong
+_weak_extern_alias_to_weak = _weak_1
+_weak_extern_alias_to_strong = _strong
+
+_pext_alias = _strong
+_local_alias = _strong
+_no_dead_strip_alias = _dead
+
+.subsections_via_symbols
+
+#--- weak-extern-alias-to-weak.s
+.globl _weak_extern_alias_to_weak
+.weak_definition _weak_extern_alias_to_weak
+_weak_extern_alias_to_weak = _weak_2
+
+#--- weak-extern-alias-to-strong.s
+.globl _weak_extern_alias_to_strong
+.weak_definition _weak_extern_alias_to_strong
+_weak_extern_alias_to_strong = _strong
+
+#--- definitions.s
+.globl _strong, _weak_1, _weak_2, _dead
+.private_extern _pext
+.weak_definition _weak_1
+.weak_definition _weak_2
+
+_strong:
+ .space 1
+_weak_1:
+ .space 1
+_weak_2:
+ .space 1
+_dead:
+ .space 1
+_pext:
+ .space 1
+
+.globl _main
+_main:
+
+.subsections_via_symbols
More information about the llvm-commits
mailing list