[lld] fbb4594 - [lld/mac] Resolve defined symbols before undefined symbols

Nico Weber via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 19 13:41:58 PDT 2021


Author: Nico Weber
Date: 2021-07-19T16:37:41-04:00
New Revision: fbb45947b2a77c6295c110c7fbb648adac14d2c9

URL: https://github.com/llvm/llvm-project/commit/fbb45947b2a77c6295c110c7fbb648adac14d2c9
DIFF: https://github.com/llvm/llvm-project/commit/fbb45947b2a77c6295c110c7fbb648adac14d2c9.diff

LOG: [lld/mac] Resolve defined symbols before undefined symbols

Ports https://reviews.llvm.org/D95985 to the MachO port.
Happens to fix PR51135; see that bug for details.
Also makes lld's behavior match ld64 for the included test case.

Differential Revision: https://reviews.llvm.org/D106293

Added: 
    lld/test/MachO/weak-definition-in-main-file.s

Modified: 
    lld/ELF/InputFiles.cpp
    lld/MachO/InputFiles.cpp

Removed: 
    


################################################################################
diff  --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 8487210eb6e18..271926a6184a2 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1131,7 +1131,7 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
   }
 
   // Symbol resolution of non-local symbols.
-  SmallVector<unsigned, 32> unds;
+  SmallVector<unsigned, 32> undefineds;
   for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
     const Elf_Sym &eSym = eSyms[i];
     uint8_t binding = eSym.getBinding();
@@ -1148,7 +1148,7 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
 
     // Handle global undefined symbols.
     if (eSym.st_shndx == SHN_UNDEF) {
-      unds.push_back(i);
+      undefineds.push_back(i);
       continue;
     }
 
@@ -1202,7 +1202,7 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
   // does not change the symbol resolution behavior. In addition, a set of
   // interconnected symbols will all be resolved to the same file, instead of
   // being resolved to 
diff erent files.
-  for (unsigned i : unds) {
+  for (unsigned i : undefineds) {
     const Elf_Sym &eSym = eSyms[i];
     StringRefZ name = this->stringTable.data() + eSym.st_name;
     this->symbols[i]->resolve(Undefined{this, name, eSym.getBinding(),

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 8f5df2529002a..e1fc35d2f534c 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -585,6 +585,11 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
   }
 }
 
+template <class NList>
+static bool isUndef(const NList &sym) {
+  return (sym.n_type & N_TYPE) == N_UNDF && sym.n_value == 0;
+}
+
 template <class LP>
 void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
                            ArrayRef<typename LP::nlist> nList,
@@ -594,6 +599,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
   // Groups indices of the symbols by the sections that contain them.
   std::vector<std::vector<uint32_t>> symbolsBySection(subsections.size());
   symbols.resize(nList.size());
+  SmallVector<unsigned, 32> undefineds;
   for (uint32_t i = 0; i < nList.size(); ++i) {
     const NList &sym = nList[i];
 
@@ -609,6 +615,8 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
       if (subsecMap.empty())
         continue;
       symbolsBySection[sym.n_sect - 1].push_back(i);
+    } else if (isUndef(sym)) {
+      undefineds.push_back(i);
     } else {
       symbols[i] = parseNonSectionSymbol(sym, name);
     }
@@ -703,6 +711,18 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
       subsecEntry = subsecMap.back();
     }
   }
+
+  // Undefined symbols can trigger recursive fetch from Archives due to
+  // LazySymbols. Process defined symbols first so that the relative order
+  // between a defined symbol and an undefined symbol does not change the
+  // symbol resolution behavior. In addition, a set of interconnected symbols
+  // will all be resolved to the same file, instead of being resolved to
+  // 
diff erent files.
+  for (unsigned i : undefineds) {
+    const NList &sym = nList[i];
+    StringRef name = strtab + sym.n_strx;
+    symbols[i] = parseNonSectionSymbol(sym, name);
+  }
 }
 
 OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,

diff  --git a/lld/test/MachO/weak-definition-in-main-file.s b/lld/test/MachO/weak-definition-in-main-file.s
new file mode 100644
index 0000000000000..dd8ab86609194
--- /dev/null
+++ b/lld/test/MachO/weak-definition-in-main-file.s
@@ -0,0 +1,44 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+
+## Test that a weak symbol in a direct .o file wins over
+## a weak symbol in a .a file.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weakfoo.s -o %t/weakfoo.o
+
+# RUN: llvm-ar --format=darwin rcs %t/weakfoo.a %t/weakfoo.o
+
+# PREFER-DIRECT-OBJECT-NOT: O __TEXT,weak __foo
+
+# RUN: %lld -lSystem -o %t/out %t/weakfoo.a %t/test.o
+# RUN: llvm-objdump --syms %t/out | FileCheck %s --check-prefix=PREFER-DIRECT-OBJECT
+
+#--- weakfoo.s
+.globl __baz
+__baz:
+  ret
+
+.section __TEXT,weak
+.weak_definition __foo
+.globl __foo
+__foo:
+  ret
+
+.subsections_via_symbols
+
+#--- test.s
+.globl __foo
+.weak_definition __foo
+__foo:
+  ret
+
+.globl _main
+_main:
+  # This pulls in weakfoo.a due to the __baz undef, but __foo should
+  # still be resolved against the weak symbol in this file.
+  callq __baz
+  callq __foo
+  ret
+
+.subsections_via_symbols


        


More information about the llvm-commits mailing list