[lld] 62a3f0c - [lld-macho] Support absolute symbols

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 25 11:28:54 PDT 2020


Author: Jez Ng
Date: 2020-09-25T11:28:35-07:00
New Revision: 62a3f0c9844bb89a48173440145b26212be60f83

URL: https://github.com/llvm/llvm-project/commit/62a3f0c9844bb89a48173440145b26212be60f83
DIFF: https://github.com/llvm/llvm-project/commit/62a3f0c9844bb89a48173440145b26212be60f83.diff

LOG: [lld-macho] Support absolute symbols

They operate like Defined symbols but with no associated InputSection.

Note that `ld64` seems to treat the weak definition flag like a no-op for
absolute symbols, so I have replicated that behavior.

Reviewed By: #lld-macho, smeenai

Differential Revision: https://reviews.llvm.org/D87909

Added: 
    lld/test/MachO/abs-symbols.s
    lld/test/MachO/invalid/abs-duplicate.s

Modified: 
    lld/MachO/ExportTrie.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/SymbolTable.cpp
    lld/MachO/Symbols.cpp
    lld/MachO/Symbols.h
    lld/MachO/SyntheticSections.cpp
    lld/test/MachO/invalid/dso-handle-duplicate.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/ExportTrie.cpp b/lld/MachO/ExportTrie.cpp
index 0093907ea959..bd0c0004309c 100644
--- a/lld/MachO/ExportTrie.cpp
+++ b/lld/MachO/ExportTrie.cpp
@@ -62,11 +62,18 @@ struct ExportInfo {
   uint8_t flags = 0;
   ExportInfo(const Symbol &sym, uint64_t imageBase)
       : address(sym.getVA() - imageBase) {
+    // Set the symbol type.
     if (sym.isWeakDef())
       flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
-    if (sym.isTlv())
-      flags |= EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
     // TODO: Add proper support for re-exports & stub-and-resolver flags.
+
+    // Set the symbol kind.
+    if (sym.isTlv()) {
+      flags |= EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
+    } else if (auto *defined = dyn_cast<Defined>(&sym)) {
+      if (defined->isAbsolute())
+        flags |= EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE;
+    }
   }
 };
 

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index ad2d6a25849a..35211ea25e73 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -222,6 +222,50 @@ void InputFile::parseRelocations(const section_64 &sec,
   }
 }
 
+static macho::Symbol *createDefined(const structs::nlist_64 &sym,
+                                    StringRef name, InputSection *isec,
+                                    uint32_t value) {
+  if (sym.n_type & N_EXT)
+    // Global defined symbol
+    return symtab->addDefined(name, isec, value, sym.n_desc & N_WEAK_DEF);
+  // Local defined symbol
+  return make<Defined>(name, isec, value, sym.n_desc & N_WEAK_DEF,
+                       /*isExternal=*/false);
+}
+
+// Absolute symbols are defined symbols that do not have an associated
+// InputSection. They cannot be weak.
+static macho::Symbol *createAbsolute(const structs::nlist_64 &sym,
+                                     StringRef name) {
+  if (sym.n_type & N_EXT)
+    return symtab->addDefined(name, nullptr, sym.n_value, /*isWeakDef=*/false);
+  return make<Defined>(name, nullptr, sym.n_value, /*isWeakDef=*/false,
+                       /*isExternal=*/false);
+}
+
+macho::Symbol *InputFile::parseNonSectionSymbol(const structs::nlist_64 &sym,
+                                                StringRef name) {
+  uint8_t type = sym.n_type & N_TYPE;
+  switch (type) {
+  case N_UNDF:
+    return sym.n_value == 0
+               ? symtab->addUndefined(name)
+               : symtab->addCommon(name, this, sym.n_value,
+                                   1 << GET_COMM_ALIGN(sym.n_desc));
+  case N_ABS:
+    return createAbsolute(sym, name);
+  case N_PBUD:
+  case N_INDR:
+    error("TODO: support symbols of type " + std::to_string(type));
+    return nullptr;
+  case N_SECT:
+    llvm_unreachable(
+        "N_SECT symbols should not be passed to parseNonSectionSymbol");
+  default:
+    llvm_unreachable("invalid symbol type");
+  }
+}
+
 void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
                              const char *strtab, bool subsectionsViaSymbols) {
   // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols
@@ -229,26 +273,12 @@ void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
   symbols.resize(nList.size());
   std::vector<size_t> altEntrySymIdxs;
 
-  auto createDefined = [&](const structs::nlist_64 &sym, InputSection *isec,
-                           uint32_t value) -> Symbol * {
-    StringRef name = strtab + sym.n_strx;
-    if (sym.n_type & N_EXT)
-      // Global defined symbol
-      return symtab->addDefined(name, isec, value, sym.n_desc & N_WEAK_DEF);
-    // Local defined symbol
-    return make<Defined>(name, isec, value, sym.n_desc & N_WEAK_DEF,
-                         /*isExternal=*/false);
-  };
-
   for (size_t i = 0, n = nList.size(); i < n; ++i) {
     const structs::nlist_64 &sym = nList[i];
+    StringRef name = strtab + sym.n_strx;
 
-    if ((sym.n_type & N_TYPE) == N_UNDF) {
-      StringRef name = strtab + sym.n_strx;
-      symbols[i] = sym.n_value == 0
-                       ? symtab->addUndefined(name)
-                       : symtab->addCommon(name, this, sym.n_value,
-                                           1 << GET_COMM_ALIGN(sym.n_desc));
+    if ((sym.n_type & N_TYPE) != N_SECT) {
+      symbols[i] = parseNonSectionSymbol(sym, name);
       continue;
     }
 
@@ -260,7 +290,7 @@ void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
     // use the same subsection. Otherwise, we must split the sections along
     // symbol boundaries.
     if (!subsectionsViaSymbols) {
-      symbols[i] = createDefined(sym, subsecMap[0], offset);
+      symbols[i] = createDefined(sym, name, subsecMap[0], offset);
       continue;
     }
 
@@ -282,7 +312,7 @@ void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
     if (firstSize == 0) {
       // Alias of an existing symbol, or the first symbol in the section. These
       // are handled by reusing the existing section.
-      symbols[i] = createDefined(sym, firstIsec, 0);
+      symbols[i] = createDefined(sym, name, firstIsec, 0);
       continue;
     }
 
@@ -298,15 +328,16 @@ void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
 
     subsecMap[offset] = secondIsec;
     // By construction, the symbol will be at offset zero in the new section.
-    symbols[i] = createDefined(sym, secondIsec, 0);
+    symbols[i] = createDefined(sym, name, secondIsec, 0);
   }
 
   for (size_t idx : altEntrySymIdxs) {
     const structs::nlist_64 &sym = nList[idx];
+    StringRef name = strtab + sym.n_strx;
     SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
     uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr;
     InputSection *subsec = findContainingSubsection(subsecMap, &off);
-    symbols[idx] = createDefined(sym, subsec, off);
+    symbols[idx] = createDefined(sym, name, subsec, off);
   }
 }
 

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index bb83c2091178..59b0e41d5de3 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -65,6 +65,8 @@ class InputFile {
   void parseSymbols(ArrayRef<lld::structs::nlist_64> nList, const char *strtab,
                     bool subsectionsViaSymbols);
 
+  Symbol *parseNonSectionSymbol(const structs::nlist_64 &sym, StringRef name);
+
   void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &);
 
 private:

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 0aa8828d1718..6a3ee9230f41 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -131,9 +131,11 @@ Symbol *SymbolTable::addDSOHandle(const MachHeaderSection *header) {
   bool wasInserted;
   std::tie(s, wasInserted) = insert(DSOHandle::name);
   if (!wasInserted) {
+    // FIXME: Make every symbol (including absolute symbols) contain a
+    // reference to their originating file, then add that file name to this
+    // error message.
     if (auto *defined = dyn_cast<Defined>(s))
-      error("found defined symbol from " + defined->isec->file->getName() +
-            " with illegal name " + DSOHandle::name);
+      error("found defined symbol with illegal name " + DSOHandle::name);
   }
   replaceSymbol<DSOHandle>(s, header);
   return s;

diff  --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index af5d9d25029f..75c699781a61 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -14,6 +14,21 @@ using namespace llvm;
 using namespace lld;
 using namespace lld::macho;
 
+uint64_t Defined::getVA() const {
+  if (isAbsolute())
+    return value;
+  return isec->getVA() + value;
+}
+
+uint64_t Defined::getFileOffset() const {
+  if (isAbsolute()) {
+    error("absolute symbol " + toString(*this) +
+          " does not have a file offset");
+    return 0;
+  }
+  return isec->getFileOffset() + value;
+}
+
 void LazySymbol::fetchArchiveMember() { file->fetch(sym); }
 
 // Returns a symbol for an error message.

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 14c1ee813420..f0d77d5ce0fe 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -89,18 +89,17 @@ class Defined : public Symbol {
         overridesWeakDef(false), weakDef(isWeakDef), external(isExternal) {}
 
   bool isWeakDef() const override { return weakDef; }
-
-  bool isTlv() const override { return isThreadLocalVariables(isec->flags); }
+  bool isTlv() const override {
+    return !isAbsolute() && isThreadLocalVariables(isec->flags);
+  }
 
   bool isExternal() const { return external; }
+  bool isAbsolute() const { return isec == nullptr; }
 
-  static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
-
-  uint64_t getVA() const override { return isec->getVA() + value; }
+  uint64_t getVA() const override;
+  uint64_t getFileOffset() const override;
 
-  uint64_t getFileOffset() const override {
-    return isec->getFileOffset() + value;
-  }
+  static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
 
   InputSection *isec;
   uint32_t value;

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index e40b832195b7..5d603d6f90d3 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -594,11 +594,17 @@ void SymtabSection::writeTo(uint8_t *buf) const {
     // TODO support other symbol types
     // TODO populate n_desc with more flags
     if (auto *defined = dyn_cast<Defined>(entry.sym)) {
-      nList->n_type = MachO::N_EXT | MachO::N_SECT;
-      nList->n_sect = defined->isec->parent->index;
+      if (defined->isAbsolute()) {
+        nList->n_type = MachO::N_EXT | MachO::N_ABS;
+        nList->n_sect = MachO::NO_SECT;
+        nList->n_value = defined->value;
+      } else {
+        nList->n_type = MachO::N_EXT | MachO::N_SECT;
+        nList->n_sect = defined->isec->parent->index;
+        // For the N_SECT symbol type, n_value is the address of the symbol
+        nList->n_value = defined->value + defined->isec->getVA();
+      }
       nList->n_desc |= defined->isWeakDef() ? MachO::N_WEAK_DEF : 0;
-      // For the N_SECT symbol type, n_value is the address of the symbol
-      nList->n_value = defined->value + defined->isec->getVA();
     }
     ++nList;
   }

diff  --git a/lld/test/MachO/abs-symbols.s b/lld/test/MachO/abs-symbols.s
new file mode 100644
index 000000000000..25f6c87ca41e
--- /dev/null
+++ b/lld/test/MachO/abs-symbols.s
@@ -0,0 +1,24 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: lld -flavor darwinnew -syslibroot %S/Inputs/MacOSX.sdk -lSystem %t.o -o %t
+# RUN: llvm-objdump --macho --syms --exports-trie %t | FileCheck %s
+
+# CHECK-LABEL: SYMBOL TABLE:
+# CHECK-DAG:   000000000000dead g       *ABS* _foo
+# CHECK-DAG:   000000000000beef g       *ABS* _weakfoo
+
+# CHECK-LABEL: Exports trie:
+# CHECK-DAG:   0x0000DEAD  _foo [absolute]
+# CHECK-DAG:   0x0000BEEF  _weakfoo [absolute]
+
+.globl _foo, _weakfoo, _main
+.weak_definition _weakfoo
+_foo = 0xdead
+_weakfoo = 0xbeef
+
+.text
+_main:
+  ret
+
+## TODO: once we support emitting local symbols in the symtab, test local
+## absolute symbols too

diff  --git a/lld/test/MachO/invalid/abs-duplicate.s b/lld/test/MachO/invalid/abs-duplicate.s
new file mode 100644
index 000000000000..e46c78351385
--- /dev/null
+++ b/lld/test/MachO/invalid/abs-duplicate.s
@@ -0,0 +1,23 @@
+# REQUIRES: x86
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weakfoo.s -o %t/weakfoo.o
+# RUN: not lld -flavor darwinnew -syslibroot %S/../Inputs/MacOSX.sdk -lSystem %t/test.o %t/weakfoo.o -o %t/test 2>&1 | FileCheck %s
+
+# CHECK: lld: error: duplicate symbol: _weakfoo
+
+#--- weakfoo.s
+.globl _weakfoo
+## The weak attribute is ignored for absolute symbols, so we will have a
+## duplicate symbol error for _weakfoo.
+.weak_definition _weakfoo
+_weakfoo = 0x1234
+
+#--- test.s
+.globl _main, _weakfoo
+.weak_definition _weakfoo
+_weakfoo = 0x5678
+
+.text
+_main:
+  ret

diff  --git a/lld/test/MachO/invalid/dso-handle-duplicate.s b/lld/test/MachO/invalid/dso-handle-duplicate.s
index 5991c6faff88..4390da62c33e 100644
--- a/lld/test/MachO/invalid/dso-handle-duplicate.s
+++ b/lld/test/MachO/invalid/dso-handle-duplicate.s
@@ -7,8 +7,8 @@
 ## far-out edge case that should be safe to ignore.
 
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
-# RUN: not lld -flavor darwinnew -dylib %t.o -o %t.dylib 2>&1 | FileCheck %s -DFILE=%t.o
-# CHECK: error: found defined symbol from [[FILE]] with illegal name ___dso_handle
+# RUN: not lld -flavor darwinnew -dylib %t.o -o %t.dylib 2>&1 | FileCheck %s
+# CHECK: error: found defined symbol with illegal name ___dso_handle
 
 .globl _main, ___dso_handle
 .text


        


More information about the llvm-commits mailing list