[llvm-branch-commits] [lld] 51629ab - [lld-macho] Emit local symbols in symtab; record metadata in LC_DYSYMTAB

Jez Ng via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Dec 1 15:10:12 PST 2020


Author: Jez Ng
Date: 2020-12-01T15:05:20-08:00
New Revision: 51629abce0e2f9d1376eb0b5070532a2bbec6766

URL: https://github.com/llvm/llvm-project/commit/51629abce0e2f9d1376eb0b5070532a2bbec6766
DIFF: https://github.com/llvm/llvm-project/commit/51629abce0e2f9d1376eb0b5070532a2bbec6766.diff

LOG: [lld-macho] Emit local symbols in symtab; record metadata in LC_DYSYMTAB

Symbols of the same type must be laid out contiguously: following ld64's
lead, we choose to emit all local symbols first, then external symbols,
and finally undefined symbols. For each symbol type, the LC_DYSYMTAB
load command will record the range (start index and total number) of
those symbols in the symbol table.

This work was motivated by the fact that LLDB won't search for debug
info if LC_DYSYMTAB says there are no local symbols (since STABS symbols
are all local symbols). With this change, LLDB is now able to display
the source lines at a given breakpoint when debugging our binaries.

Some tests had to be updated due to local symbol names now appearing in
`llvm-objdump`'s output.

Reviewed By: #lld-macho, smeenai, clayborg

Differential Revision: https://reviews.llvm.org/D89285

Added: 
    

Modified: 
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/MachO/Writer.cpp
    lld/test/MachO/stabs.s
    lld/test/MachO/subsections-symbol-relocs.s
    lld/test/MachO/symtab.s
    lld/test/MachO/tlv.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index feaa4b5d3e22..56a095c4a7e4 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -19,6 +19,7 @@
 
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/LEB128.h"
@@ -630,14 +631,35 @@ void SymtabSection::emitFunStabs(Defined *defined) {
 }
 
 void SymtabSection::finalizeContents() {
-  InputFile *lastFile = nullptr;
+  // Local symbols aren't in the SymbolTable, so we walk the list of object
+  // files to gather them.
+  for (InputFile *file : inputFiles) {
+    if (auto *objFile = dyn_cast<ObjFile>(file)) {
+      for (Symbol *sym : objFile->symbols) {
+        // TODO: when we implement -dead_strip, we should filter out symbols
+        // that belong to dead sections.
+        if (auto *defined = dyn_cast<Defined>(sym)) {
+          if (!defined->isExternal()) {
+            uint32_t strx = stringTableSection.addString(sym->getName());
+            localSymbols.push_back({sym, strx});
+          }
+        }
+      }
+    }
+  }
+
   for (Symbol *sym : symtab->getSymbols()) {
-    // TODO support other symbol types
-    if (isa<Defined>(sym) || sym->isInGot() || sym->isInStubs()) {
-      sym->symtabIndex = symbols.size();
-      symbols.push_back({sym, stringTableSection.addString(sym->getName())});
+    uint32_t strx = stringTableSection.addString(sym->getName());
+    if (auto *defined = dyn_cast<Defined>(sym)) {
+      assert(defined->isExternal());
+      externalSymbols.push_back({sym, strx});
+    } else if (sym->isInGot() || sym->isInStubs()) {
+      undefinedSymbols.push_back({sym, strx});
     }
+  }
 
+  InputFile *lastFile = nullptr;
+  for (Symbol *sym : symtab->getSymbols()) {
     // Emit STABS symbols so that dsymutil and/or the debugger can map address
     // regions in the final binary to the source and object files from which
     // they originated.
@@ -670,13 +692,35 @@ void SymtabSection::finalizeContents() {
 
   if (!stabs.empty())
     emitEndSourceStab();
+
+  uint32_t symtabIndex = stabs.size();
+  for (const SymtabEntry &entry :
+       concat<SymtabEntry>(localSymbols, externalSymbols, undefinedSymbols)) {
+    entry.sym->symtabIndex = symtabIndex++;
+  }
+}
+
+uint32_t SymtabSection::getNumSymbols() const {
+  return stabs.size() + localSymbols.size() + externalSymbols.size() +
+         undefinedSymbols.size();
 }
 
 void SymtabSection::writeTo(uint8_t *buf) const {
   auto *nList = reinterpret_cast<structs::nlist_64 *>(buf);
-  for (const SymtabEntry &entry : symbols) {
+  // Emit the stabs entries before the "real" symbols. We cannot emit them
+  // after as that would render Symbol::symtabIndex inaccurate.
+  for (const StabsEntry &entry : stabs) {
+    nList->n_strx = entry.strx;
+    nList->n_type = entry.type;
+    nList->n_sect = entry.sect;
+    nList->n_desc = entry.desc;
+    nList->n_value = entry.value;
+    ++nList;
+  }
+
+  for (const SymtabEntry &entry : concat<const SymtabEntry>(
+           localSymbols, externalSymbols, undefinedSymbols)) {
     nList->n_strx = entry.strx;
-    // TODO support other symbol types
     // TODO populate n_desc with more flags
     if (auto *defined = dyn_cast<Defined>(entry.sym)) {
       if (defined->isAbsolute()) {
@@ -684,7 +728,8 @@ void SymtabSection::writeTo(uint8_t *buf) const {
         nList->n_sect = MachO::NO_SECT;
         nList->n_value = defined->value;
       } else {
-        nList->n_type = MachO::N_EXT | MachO::N_SECT;
+        nList->n_type =
+            (defined->isExternal() ? MachO::N_EXT : 0) | MachO::N_SECT;
         nList->n_sect = defined->isec->parent->index;
         // For the N_SECT symbol type, n_value is the address of the symbol
         nList->n_value = defined->getVA();
@@ -693,17 +738,6 @@ void SymtabSection::writeTo(uint8_t *buf) const {
     }
     ++nList;
   }
-
-  // Emit the stabs entries after the "real" symbols. We cannot emit them
-  // before as that would render Symbol::symtabIndex inaccurate.
-  for (const StabsEntry &entry : stabs) {
-    nList->n_strx = entry.strx;
-    nList->n_type = entry.type;
-    nList->n_sect = entry.sect;
-    nList->n_desc = entry.desc;
-    nList->n_value = entry.value;
-    ++nList;
-  }
 }
 
 IndirectSymtabSection::IndirectSymtabSection()

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 4a8820a285a8..e52bc480e9aa 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -420,11 +420,20 @@ struct StabsEntry {
   explicit StabsEntry(uint8_t type) : type(type) {}
 };
 
+// Symbols of the same type must be laid out contiguously: we choose to emit
+// all local symbols first, then external symbols, and finally undefined
+// symbols. For each symbol type, the LC_DYSYMTAB load command will record the
+// range (start index and total number) of those symbols in the symbol table.
 class SymtabSection : public LinkEditSection {
 public:
   SymtabSection(StringTableSection &);
   void finalizeContents();
-  size_t getNumSymbols() const { return stabs.size() + symbols.size(); }
+  uint32_t getNumSymbols() const;
+  uint32_t getNumLocalSymbols() const {
+    return stabs.size() + localSymbols.size();
+  }
+  uint32_t getNumExternalSymbols() const { return externalSymbols.size(); }
+  uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
   uint64_t getRawSize() const override;
   void writeTo(uint8_t *buf) const override;
 
@@ -435,8 +444,12 @@ class SymtabSection : public LinkEditSection {
   void emitFunStabs(Defined *);
 
   StringTableSection &stringTableSection;
+  // STABS symbols are always local symbols, but we represent them with special
+  // entries because they may use fields like n_sect and n_desc 
diff erently.
   std::vector<StabsEntry> stabs;
-  std::vector<SymtabEntry> symbols;
+  std::vector<SymtabEntry> localSymbols;
+  std::vector<SymtabEntry> externalSymbols;
+  std::vector<SymtabEntry> undefinedSymbols;
 };
 
 // The indirect symbol table is a list of 32-bit integers that serve as indices

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index e9d88af90ecc..39de749e2735 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -113,8 +113,10 @@ class LCDyldInfo : public LoadCommand {
 
 class LCDysymtab : public LoadCommand {
 public:
-  LCDysymtab(IndirectSymtabSection *indirectSymtabSection)
-      : indirectSymtabSection(indirectSymtabSection) {}
+  LCDysymtab(SymtabSection *symtabSection,
+             IndirectSymtabSection *indirectSymtabSection)
+      : symtabSection(symtabSection),
+        indirectSymtabSection(indirectSymtabSection) {}
 
   uint32_t getSize() const override { return sizeof(dysymtab_command); }
 
@@ -122,11 +124,19 @@ class LCDysymtab : public LoadCommand {
     auto *c = reinterpret_cast<dysymtab_command *>(buf);
     c->cmd = LC_DYSYMTAB;
     c->cmdsize = getSize();
+
+    c->ilocalsym = 0;
+    c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols();
+    c->nextdefsym = symtabSection->getNumExternalSymbols();
+    c->iundefsym = c->iextdefsym + c->nextdefsym;
+    c->nundefsym = symtabSection->getNumUndefinedSymbols();
+
     c->indirectsymoff = indirectSymtabSection->fileOff;
     c->nindirectsyms = indirectSymtabSection->getNumSymbols();
   }
 
-  IndirectSymtabSection *indirectSymtabSection = nullptr;
+  SymtabSection *symtabSection;
+  IndirectSymtabSection *indirectSymtabSection;
 };
 
 class LCSegment : public LoadCommand {
@@ -396,7 +406,8 @@ void Writer::createLoadCommands() {
   in.header->addLoadCommand(make<LCDyldInfo>(
       in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports));
   in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection));
-  in.header->addLoadCommand(make<LCDysymtab>(indirectSymtabSection));
+  in.header->addLoadCommand(
+      make<LCDysymtab>(symtabSection, indirectSymtabSection));
   for (StringRef path : config->runtimePaths)
     in.header->addLoadCommand(make<LCRPath>(path));
 

diff  --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s
index 5e85ccc3bc4a..2c49453dcd43 100644
--- a/lld/test/MachO/stabs.s
+++ b/lld/test/MachO/stabs.s
@@ -12,18 +12,18 @@
 # RUN: cd %t && %lld -lSystem test.o foo.o -o test
 # RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t
 
-# CHECK-DAG:  [[#%x, MAIN:]]   T _main
-# CHECK-DAG:  [[#%x, FOO: ]]   T _foo
 # CHECK:      0000000000000000 - 00 0000    SO /tmp/test.cpp
 # CHECK-NEXT: 0000000000000000 - 03 0001   OSO [[DIR]]/test.o
-# CHECK-NEXT: [[#MAIN]]        - 01 0000   FUN _main
+# CHECK-NEXT: [[#%x, MAIN:]]   - 01 0000   FUN _main
 # CHECK-NEXT: 0000000000000001 - 00 0000   FUN
 # CHECK-NEXT: 0000000000000000 - 01 0000    SO
 # CHECK-NEXT: 0000000000000000 - 00 0000    SO /foo.cpp
 # CHECK-NEXT: 0000000000000000 - 03 0001   OSO [[DIR]]/foo.o
-# CHECK-NEXT: [[#FOO]]         - 01 0000   FUN _foo
+# CHECK-NEXT: [[#%x, FOO:]]    - 01 0000   FUN _foo
 # CHECK-NEXT: 0000000000000001 - 00 0000   FUN
 # CHECK-NEXT: 0000000000000000 - 01 0000    SO
+# CHECK-NEXT: [[#MAIN]]        T _main
+# CHECK-NEXT: [[#FOO]]         T _foo
 
 #--- test.s
 .text

diff  --git a/lld/test/MachO/subsections-symbol-relocs.s b/lld/test/MachO/subsections-symbol-relocs.s
index bb5470515b5c..8010a50e7444 100644
--- a/lld/test/MachO/subsections-symbol-relocs.s
+++ b/lld/test/MachO/subsections-symbol-relocs.s
@@ -22,7 +22,7 @@
 # RUN: %lld -o %t/test-2 %t/test.o -order_file %t/order-file-2
 # RUN: llvm-objdump -d --no-show-raw-insn %t/test-2 | FileCheck %s
 # CHECK-LABEL: Disassembly of section __TEXT,__text:
-# CHECK:       <_bar>:
+# CHECK:       <_ba{{r|z}}>:
 # CHECK-NEXT:    callq {{.*}} <_foo>
 # CHECK-EMPTY:
 # CHECK-NEXT:  <_qux>:
@@ -30,7 +30,7 @@
 # CHECK:       <_foo>:
 # CHECK-NEXT:    retq
 # CHECK:       <_main>:
-# CHECK-NEXT:    callq {{.*}} <_bar>
+# CHECK-NEXT:    callq {{.*}} <_ba{{r|z}}>
 # CHECK-NEXT:    movq $0, %rax
 # CHECK-NEXT:    retq
 

diff  --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s
index 1e9ec157193d..22dc80ad62f3 100644
--- a/lld/test/MachO/symtab.s
+++ b/lld/test/MachO/symtab.s
@@ -1,56 +1,108 @@
 # REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
-# RUN: %lld -o %t %t.o
-# RUN: llvm-readobj -symbols %t | FileCheck %s
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o
+# RUN: %lld -dylib %t/libfoo.o -o %t/libfoo.dylib
+# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib -o %t/test
+# RUN: llvm-readobj --syms --macho-dysymtab %t/test | FileCheck %s
 
 # CHECK:      Symbols [
 # CHECK-NEXT:   Symbol {
-# CHECK-NEXT:     Name: _main
+# CHECK-NEXT:     Name: _local (1)
+# CHECK-NEXT:     Type: Section (0xE)
+# CHECK-NEXT:     Section: __data (0x4)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x1{{[0-9a-f]*}}
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: _main (8)
 # CHECK-NEXT:     Extern
 # CHECK-NEXT:     Type: Section (0xE)
 # CHECK-NEXT:     Section: __text (0x1)
-# CHECK-NEXT:     RefType:
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x1{{[0-9a-f]*}}
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: _external (54)
+# CHECK-NEXT:     Extern
+# CHECK-NEXT:     Type: Section (0xE)
+# CHECK-NEXT:     Section: __data (0x4)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]
-# CHECK-NEXT:     Value:
+# CHECK-NEXT:     Value: 0x1{{[0-9a-f]*}}
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Symbol {
-# CHECK-NEXT:     Name: bar
+# CHECK-NEXT:     Name: _external_weak (64)
 # CHECK-NEXT:     Extern
 # CHECK-NEXT:     Type: Section (0xE)
 # CHECK-NEXT:     Section: __text (0x1)
-# CHECK-NEXT:     RefType:
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
 # CHECK-NEXT:     Flags [ (0x80)
 # CHECK-NEXT:       WeakDef (0x80)
 # CHECK-NEXT:     ]
-# CHECK-NEXT:     Value:
+# CHECK-NEXT:     Value: 0x1{{[0-9a-f]*}}
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Symbol {
-# CHECK-NEXT:     Name: foo
+# CHECK-NEXT:     Name: __dyld_private (102)
 # CHECK-NEXT:     Extern
 # CHECK-NEXT:     Type: Section (0xE)
-# CHECK-NEXT:     Section: __data
-# CHECK-NEXT:     RefType:
+# CHECK-NEXT:     Section: __data (0x4)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]
-# CHECK-NEXT:     Value:
+# CHECK-NEXT:     Value: 0x1{{[0-9a-f]*}}
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: dyld_stub_binder (14)
+# CHECK-NEXT:     Type: Undef (0x0)
+# CHECK-NEXT:     Section:  (0x0)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x0
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: _dynamic (79)
+# CHECK-NEXT:     Type: Undef (0x0)
+# CHECK-NEXT:     Section:  (0x0)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x0
 # CHECK-NEXT:   }
 # CHECK-NEXT: ]
+# CHECK-NEXT: Dysymtab {
+# CHECK-NEXT:   ilocalsym: 0
+# CHECK-NEXT:   nlocalsym: 1
+# CHECK-NEXT:   iextdefsym: 1
+# CHECK-NEXT:   nextdefsym: 4
+# CHECK-NEXT:   iundefsym: 5
+# CHECK-NEXT:   nundefsym: 2
+
+#--- libfoo.s
+.globl _dynamic
+_dynamic:
+
+#--- test.s
+.globl _main, _external, _external_weak
 
 .data
-.global foo
-foo:
-  .asciz "Hello world!\n"
+_external:
+  .space 0
+_local:
+  .space 0
 
 .text
-.weak_definition bar
-.global bar
-.global _main
+.weak_definition _external_weak
+_external_weak:
+  .space 0
 
 _main:
+  callq _dynamic
   mov $0, %rax
   ret
-
-bar:
-  mov $2, %rax
-  ret

diff  --git a/lld/test/MachO/tlv.s b/lld/test/MachO/tlv.s
index cf472bcfb14e..bf0dfcd02b56 100644
--- a/lld/test/MachO/tlv.s
+++ b/lld/test/MachO/tlv.s
@@ -16,10 +16,13 @@
 # CHECK-EMPTY:
 # CHECK-NEXT:  Disassembly of section __DATA,__thread_data:
 # CHECK-EMPTY:
-# CHECK-NEXT:  <__thread_data>:
-# CHECK-NEXT:  ef
-# CHECK-NEXT:  be ad de be ba
-# CHECK-NEXT:  fe ca
+# CHECK-NEXT:  <_foo$tlv$init>:
+# CHECK-NEXT:  00 00
+# CHECK-NEXT:  00 00
+# CHECK-EMPTY:
+# CHECK-NEXT:  <_bar$tlv$init>:
+# CHECK-NEXT:  00 00
+# CHECK-NEXT:  00 00
 # CHECK-EMPTY:
 # CHECK-NEXT:  Disassembly of section __DATA,__thread_vars:
 # CHECK-EMPTY:
@@ -41,9 +44,9 @@ _main:
 
 .section	__DATA,__thread_data,thread_local_regular
 _foo$tlv$init:
-  .long	0xdeadbeef
+  .space 4
 _bar$tlv$init:
-  .long	0xcafebabe
+  .space 4
 
 .section	__DATA,__thread_vars,thread_local_variables
 .globl	_foo, _bar


        


More information about the llvm-branch-commits mailing list