[lld] 5d26bd3 - [lld-macho] Emit indirect symbol table

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 23 19:26:55 PDT 2020


Author: Jez Ng
Date: 2020-09-23T19:26:40-07:00
New Revision: 5d26bd3b75538515194d9e54c76e723524c1ac18

URL: https://github.com/llvm/llvm-project/commit/5d26bd3b75538515194d9e54c76e723524c1ac18
DIFF: https://github.com/llvm/llvm-project/commit/5d26bd3b75538515194d9e54c76e723524c1ac18.diff

LOG: [lld-macho] Emit indirect symbol table

Makes it a little easier to read objdump's disassembly.

Reviewed By: #lld-macho, gkm

Differential Revision: https://reviews.llvm.org/D87178

Added: 
    lld/test/MachO/indirect-symtab.s

Modified: 
    lld/MachO/OutputSection.h
    lld/MachO/Symbols.h
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/OutputSection.h b/lld/MachO/OutputSection.h
index 07b53a04639f4..c526a8343afe1 100644
--- a/lld/MachO/OutputSection.h
+++ b/lld/MachO/OutputSection.h
@@ -63,6 +63,8 @@ class OutputSection {
   uint64_t fileOff = 0;
   uint32_t align = 1;
   uint32_t flags = 0;
+  uint32_t reserved1 = 0;
+  uint32_t reserved2 = 0;
 
 private:
   Kind sectionKind;

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 8e1b9c62e92e9..14c1ee813420a 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -72,6 +72,8 @@ class Symbol {
 
   uint32_t stubsIndex = UINT32_MAX;
 
+  uint32_t symtabIndex = UINT32_MAX;
+
 protected:
   Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {}
 

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 2c8065cabee04..ae288955862a3 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -296,7 +296,10 @@ void macho::addNonLazyBindingEntries(const Symbol *sym,
 }
 
 StubsSection::StubsSection()
-    : SyntheticSection(segment_names::text, "__stubs") {}
+    : SyntheticSection(segment_names::text, "__stubs") {
+  flags = MachO::S_SYMBOL_STUBS;
+  reserved2 = target->stubSize;
+}
 
 uint64_t StubsSection::getSize() const {
   return entries.size() * target->stubSize;
@@ -464,9 +467,12 @@ uint64_t SymtabSection::getRawSize() const {
 
 void SymtabSection::finalizeContents() {
   // TODO support other symbol types
-  for (Symbol *sym : symtab->getSymbols())
-    if (isa<Defined>(sym))
+  for (Symbol *sym : symtab->getSymbols()) {
+    if (isa<Defined>(sym) || sym->isInGot() || sym->isInStubs()) {
+      sym->symtabIndex = symbols.size();
       symbols.push_back({sym, stringTableSection.addString(sym->getName())});
+    }
+  }
 }
 
 void SymtabSection::writeTo(uint8_t *buf) const {
@@ -486,6 +492,47 @@ void SymtabSection::writeTo(uint8_t *buf) const {
   }
 }
 
+IndirectSymtabSection::IndirectSymtabSection()
+    : LinkEditSection(segment_names::linkEdit,
+                      section_names::indirectSymbolTable) {}
+
+uint32_t IndirectSymtabSection::getNumSymbols() const {
+  return in.got->getEntries().size() + in.tlvPointers->getEntries().size() +
+         in.stubs->getEntries().size();
+}
+
+bool IndirectSymtabSection::isNeeded() const {
+  return in.got->isNeeded() || in.tlvPointers->isNeeded() ||
+         in.stubs->isNeeded();
+}
+
+void IndirectSymtabSection::finalizeContents() {
+  uint32_t off = 0;
+  in.got->reserved1 = off;
+  off += in.got->getEntries().size();
+  in.tlvPointers->reserved1 = off;
+  off += in.tlvPointers->getEntries().size();
+  // There is a 1:1 correspondence between stubs and LazyPointerSection
+  // entries, so they can share the same sub-array in the table.
+  in.stubs->reserved1 = in.lazyPointers->reserved1 = off;
+}
+
+void IndirectSymtabSection::writeTo(uint8_t *buf) const {
+  uint32_t off = 0;
+  for (const Symbol *sym : in.got->getEntries()) {
+    write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
+    ++off;
+  }
+  for (const Symbol *sym : in.tlvPointers->getEntries()) {
+    write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
+    ++off;
+  }
+  for (const Symbol *sym : in.stubs->getEntries()) {
+    write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
+    ++off;
+  }
+}
+
 StringTableSection::StringTableSection()
     : LinkEditSection(segment_names::linkEdit, section_names::stringTable) {}
 

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 03bc216e206dc..55fd15ea8aff6 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -33,6 +33,7 @@ constexpr const char weakBinding[] = "__weak_binding";
 constexpr const char lazyBinding[] = "__lazy_binding";
 constexpr const char export_[] = "__export";
 constexpr const char symbolTable[] = "__symbol_table";
+constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
 constexpr const char stringTable[] = "__string_table";
 constexpr const char got[] = "__got";
 constexpr const char threadPtrs[] = "__thread_ptrs";
@@ -391,6 +392,28 @@ class SymtabSection : public LinkEditSection {
   std::vector<SymtabEntry> symbols;
 };
 
+// The indirect symbol table is a list of 32-bit integers that serve as indices
+// into the (actual) symbol table. The indirect symbol table is a
+// concatentation of several sub-arrays of indices, each sub-array belonging to
+// a separate section. The starting offset of each sub-array is stored in the
+// reserved1 header field of the respective section.
+//
+// These sub-arrays provide symbol information for sections that store
+// contiguous sequences of symbol references. These references can be pointers
+// (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
+// function stubs).
+class IndirectSymtabSection : public LinkEditSection {
+public:
+  IndirectSymtabSection();
+  void finalizeContents();
+  uint32_t getNumSymbols() const;
+  uint64_t getRawSize() const override {
+    return getNumSymbols() * sizeof(uint32_t);
+  }
+  bool isNeeded() const override;
+  void writeTo(uint8_t *buf) const override;
+};
+
 struct InStruct {
   MachHeaderSection *header = nullptr;
   BindingSection *binding = nullptr;

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 86bbab20ffc03..1f1c34ca0bed5 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -60,6 +60,7 @@ class Writer {
   MachHeaderSection *header = nullptr;
   StringTableSection *stringTableSection = nullptr;
   SymtabSection *symtabSection = nullptr;
+  IndirectSymtabSection *indirectSymtabSection = nullptr;
   UnwindInfoSection *unwindInfoSection = nullptr;
 };
 
@@ -105,13 +106,20 @@ class LCDyldInfo : public LoadCommand {
 
 class LCDysymtab : public LoadCommand {
 public:
+  LCDysymtab(IndirectSymtabSection *indirectSymtabSection)
+      : indirectSymtabSection(indirectSymtabSection) {}
+
   uint32_t getSize() const override { return sizeof(dysymtab_command); }
 
   void writeTo(uint8_t *buf) const override {
     auto *c = reinterpret_cast<dysymtab_command *>(buf);
     c->cmd = LC_DYSYMTAB;
     c->cmdsize = getSize();
+    c->indirectsymoff = indirectSymtabSection->fileOff;
+    c->nindirectsyms = indirectSymtabSection->getNumSymbols();
   }
+
+  IndirectSymtabSection *indirectSymtabSection = nullptr;
 };
 
 class LCSegment : public LoadCommand {
@@ -163,6 +171,8 @@ class LCSegment : public LoadCommand {
       sectHdr->align = Log2_32(osec->align);
       sectHdr->flags = osec->flags;
       sectHdr->size = osec->getSize();
+      sectHdr->reserved1 = osec->reserved1;
+      sectHdr->reserved2 = osec->reserved2;
     }
   }
 
@@ -339,7 +349,7 @@ void Writer::createLoadCommands() {
   in.header->addLoadCommand(
       make<LCDyldInfo>(in.binding, in.weakBinding, in.lazyBinding, in.exports));
   in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection));
-  in.header->addLoadCommand(make<LCDysymtab>());
+  in.header->addLoadCommand(make<LCDysymtab>(indirectSymtabSection));
   for (StringRef path : config->runtimePaths)
     in.header->addLoadCommand(make<LCRPath>(path));
 
@@ -438,11 +448,12 @@ static int sectionOrder(OutputSection *osec) {
         .Default(0);
   } else if (segname == segment_names::linkEdit) {
     return StringSwitch<int>(osec->name)
-        .Case(section_names::binding, -6)
-        .Case(section_names::weakBinding, -5)
-        .Case(section_names::lazyBinding, -4)
-        .Case(section_names::export_, -3)
-        .Case(section_names::symbolTable, -2)
+        .Case(section_names::binding, -7)
+        .Case(section_names::weakBinding, -6)
+        .Case(section_names::lazyBinding, -5)
+        .Case(section_names::export_, -4)
+        .Case(section_names::symbolTable, -3)
+        .Case(section_names::indirectSymbolTable, -2)
         .Case(section_names::stringTable, -1)
         .Default(0);
   }
@@ -494,6 +505,7 @@ void Writer::createOutputSections() {
   stringTableSection = make<StringTableSection>();
   unwindInfoSection = make<UnwindInfoSection>(); // TODO(gkm): only when no -r
   symtabSection = make<SymtabSection>(*stringTableSection);
+  indirectSymtabSection = make<IndirectSymtabSection>();
 
   switch (config->outputType) {
   case MH_EXECUTE:
@@ -614,6 +626,7 @@ void Writer::run() {
   in.lazyBinding->finalizeContents();
   in.exports->finalizeContents();
   symtabSection->finalizeContents();
+  indirectSymtabSection->finalizeContents();
 
   // Now that __LINKEDIT is filled out, do a proper calculation of its
   // addresses and offsets.

diff  --git a/lld/test/MachO/indirect-symtab.s b/lld/test/MachO/indirect-symtab.s
new file mode 100644
index 0000000000000..515fd34bf642d
--- /dev/null
+++ b/lld/test/MachO/indirect-symtab.s
@@ -0,0 +1,60 @@
+# REQUIRES: x86
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: lld -flavor darwinnew -dylib %t/libfoo.o -o %t/libfoo.dylib -syslibroot %S/Inputs/MacOSX.sdk -lSystem
+# RUN: lld -flavor darwinnew %t/test.o %t/libfoo.dylib -o %t/test -syslibroot %S/Inputs/MacOSX.sdk -lSystem
+# RUN: llvm-objdump --macho -d --no-show-raw-insn --indirect-symbols %t/test | FileCheck %s
+
+# CHECK:      (__TEXT,__text) section
+# CHECK-NEXT: _main:
+# CHECK-NEXT: movq	{{.*}}(%rip), %rax ## literal pool symbol address: _foo
+# CHECK-NEXT: movq	{{.*}}(%rip), %rax ## literal pool symbol address: _bar
+# CHECK-NEXT: movq	{{.*}}(%rip), %rax ## literal pool symbol address: _foo_tlv
+# CHECK-NEXT: movq	{{.*}}(%rip), %rax ## literal pool symbol address: _bar_tlv
+# CHECK-NEXT: callq	{{.*}} ## symbol stub for: _foo_fn
+# CHECK-NEXT: callq	{{.*}} ## symbol stub for: _bar_fn
+# CHECK-NEXT: retq
+
+# CHECK:      Indirect symbols for (__TEXT,__stubs) 2 entries
+# CHECK-NEXT: address            index name
+# CHECK-NEXT: _bar_fn
+# CHECK-NEXT: _foo_fn
+# CHECK-NEXT: Indirect symbols for (__DATA,__thread_ptrs) 2 entries
+# CHECK-NEXT: address            index name
+# CHECK-NEXT: _bar_tlv
+# CHECK-NEXT: _foo_tlv
+# CHECK-NEXT: Indirect symbols for (__DATA,__la_symbol_ptr) 2 entries
+# CHECK-NEXT: address            index name
+# CHECK-NEXT: _bar_fn
+# CHECK-NEXT: _foo_fn
+# CHECK-NEXT: Indirect symbols for (__DATA_CONST,__got) 3 entries
+# CHECK-NEXT: address            index name
+# CHECK-NEXT: _bar
+# CHECK-NEXT: _foo
+# CHECK-NEXT: _stub_binder
+
+#--- libfoo.s
+
+.globl _foo, _foo_fn, _bar, _bar_fn
+_foo:
+_foo_fn:
+_bar:
+_bar_fn:
+
+.section  __DATA,__thread_vars,thread_local_variables
+.globl _foo_tlv, _bar_tlv
+_foo_tlv:
+_bar_tlv:
+
+#--- test.s
+
+.globl _main
+_main:
+  movq _foo at GOTPCREL(%rip), %rax
+  movq _bar at GOTPCREL(%rip), %rax
+  mov _foo_tlv at TLVP(%rip), %rax
+  mov _bar_tlv at TLVP(%rip), %rax
+  callq _foo_fn
+  callq _bar_fn
+  ret


        


More information about the llvm-commits mailing list