[lld] a3d95a5 - [lld-macho] Add basic symbol table output

Shoaib Meenai via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 27 13:35:27 PDT 2020


Author: Jez Ng
Date: 2020-04-27T13:33:15-07:00
New Revision: a3d95a50ee3397c89327fce6983624446cd8e9d7

URL: https://github.com/llvm/llvm-project/commit/a3d95a50ee3397c89327fce6983624446cd8e9d7
DIFF: https://github.com/llvm/llvm-project/commit/a3d95a50ee3397c89327fce6983624446cd8e9d7.diff

LOG: [lld-macho] Add basic symbol table output

This diff implements basic support for writing a symbol table.

- Attributes are loosely supported for extern symbols and not at all for
  other types

Immediate future work will involve implementing section merging.

Initial version by Kellie Medlin <kelliem at fb.com>

Differential Revision: https://reviews.llvm.org/D76742

Added: 
    lld/test/MachO/symtab.s

Modified: 
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index d4af5a706a80..5f08547aa127 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -9,6 +9,7 @@
 #include "SyntheticSections.h"
 #include "InputFiles.h"
 #include "OutputSegment.h"
+#include "SymbolTable.h"
 #include "Symbols.h"
 #include "Writer.h"
 
@@ -128,6 +129,59 @@ void BindingSection::writeTo(uint8_t *buf) {
   memcpy(buf, contents.data(), contents.size());
 }
 
+SymtabSection::SymtabSection(StringTableSection &stringTableSection)
+    : stringTableSection(stringTableSection) {
+  segname = segment_names::linkEdit;
+  name = section_names::symbolTable;
+}
+
+size_t SymtabSection::getSize() const {
+  return symbols.size() * sizeof(nlist_64);
+}
+
+void SymtabSection::finalizeContents() {
+  // TODO: We should filter out some symbols.
+  for (Symbol *sym : symtab->getSymbols())
+    symbols.push_back({sym, stringTableSection.addString(sym->getName())});
+}
+
+void SymtabSection::writeTo(uint8_t *buf) {
+  auto *nList = reinterpret_cast<nlist_64 *>(buf);
+  for (const SymtabEntry &entry : symbols) {
+    // TODO support other symbol types
+    // TODO populate n_desc
+    if (auto defined = dyn_cast<Defined>(entry.sym)) {
+      nList->n_strx = entry.strx;
+      nList->n_type = N_EXT | N_SECT;
+      nList->n_sect = defined->isec->sectionIndex;
+      // For the N_SECT symbol type, n_value is the address of the symbol
+      nList->n_value = defined->value + defined->isec->addr;
+    }
+
+    ++nList;
+  }
+}
+
+StringTableSection::StringTableSection() {
+  segname = segment_names::linkEdit;
+  name = section_names::stringTable;
+}
+
+uint32_t StringTableSection::addString(StringRef str) {
+  uint32_t strx = size;
+  strings.push_back(str);
+  size += str.size() + 1; // account for null terminator
+  return strx;
+}
+
+void StringTableSection::writeTo(uint8_t *buf) {
+  uint32_t off = 0;
+  for (StringRef str : strings) {
+    memcpy(buf + off, str.data(), str.size());
+    off += str.size() + 1; // account for null terminator
+  }
+}
+
 InStruct in;
 
 } // namespace macho

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 2adc5754a7e8..3988772906ec 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -23,6 +23,8 @@ namespace section_names {
 constexpr const char *pageZero = "__pagezero";
 constexpr const char *header = "__mach_header";
 constexpr const char *binding = "__binding";
+constexpr const char *symbolTable = "__symbol_table";
+constexpr const char *stringTable = "__string_table";
 
 } // namespace section_names
 
@@ -93,6 +95,49 @@ class BindingSection : public InputSection {
   SmallVector<char, 128> contents;
 };
 
+// Stores the strings referenced by the symbol table.
+class StringTableSection : public InputSection {
+public:
+  StringTableSection();
+  // Returns the start offset of the added string.
+  uint32_t addString(StringRef);
+  size_t getSize() const override { return size; }
+  // Like other sections in __LINKEDIT, the string table section is special: its
+  // offsets are recorded in the LC_SYMTAB load command, instead of in section
+  // headers.
+  bool isHidden() const override { return true; }
+  void writeTo(uint8_t *buf) override;
+
+private:
+  // An n_strx value of 0 always indicates the empty string, so we must locate
+  // our non-empty string values at positive offsets in the string table.
+  // Therefore we insert a dummy value at position zero.
+  std::vector<StringRef> strings{"\0"};
+  size_t size = 1;
+};
+
+struct SymtabEntry {
+  Symbol *sym;
+  size_t strx;
+};
+
+class SymtabSection : public InputSection {
+public:
+  SymtabSection(StringTableSection &);
+  void finalizeContents();
+  size_t getNumSymbols() const { return symbols.size(); }
+  size_t getSize() const override;
+  // Like other sections in __LINKEDIT, the symtab section is special: its
+  // offsets are recorded in the LC_SYMTAB load command, instead of in section
+  // headers.
+  bool isHidden() const override { return true; }
+  void writeTo(uint8_t *buf) override;
+
+private:
+  StringTableSection &stringTableSection;
+  std::vector<SymtabEntry> symbols;
+};
+
 struct InStruct {
   GotSection *got = nullptr;
 };

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index b6e5ed0c5e4d..7041df2d6e82 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -52,6 +52,8 @@ class Writer {
   uint64_t fileOff = 0;
   MachHeaderSection *headerSection = nullptr;
   BindingSection *bindingSection = nullptr;
+  SymtabSection *symtabSection = nullptr;
+  StringTableSection *stringTableSection = nullptr;
 };
 
 // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@@ -163,13 +165,23 @@ class LCMain : public LoadCommand {
 
 class LCSymtab : public LoadCommand {
 public:
+  LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection)
+      : symtabSection(symtabSection), stringTableSection(stringTableSection) {}
+
   uint32_t getSize() const override { return sizeof(symtab_command); }
 
   void writeTo(uint8_t *buf) const override {
     auto *c = reinterpret_cast<symtab_command *>(buf);
     c->cmd = LC_SYMTAB;
     c->cmdsize = getSize();
+    c->symoff = symtabSection->getFileOffset();
+    c->nsyms = symtabSection->getNumSymbols();
+    c->stroff = stringTableSection->getFileOffset();
+    c->strsize = stringTableSection->getFileSize();
   }
+
+  SymtabSection *symtabSection = nullptr;
+  StringTableSection *stringTableSection = nullptr;
 };
 
 class LCLoadDylib : public LoadCommand {
@@ -238,7 +250,12 @@ class SectionComparator {
         {defaultPosition, {}},
         // Make sure __LINKEDIT is the last segment (i.e. all its hidden
         // sections must be ordered after other sections).
-        {segment_names::linkEdit, {section_names::binding}},
+        {segment_names::linkEdit,
+         {
+             section_names::binding,
+             section_names::symbolTable,
+             section_names::stringTable,
+         }},
     };
 
     for (uint32_t i = 0, n = ordering.size(); i < n; ++i) {
@@ -294,7 +311,8 @@ void Writer::scanRelocations() {
 void Writer::createLoadCommands() {
   headerSection->addLoadCommand(make<LCDyldInfo>(bindingSection));
   headerSection->addLoadCommand(make<LCLoadDylinker>());
-  headerSection->addLoadCommand(make<LCSymtab>());
+  headerSection->addLoadCommand(
+      make<LCSymtab>(symtabSection, stringTableSection));
   headerSection->addLoadCommand(make<LCDysymtab>());
   headerSection->addLoadCommand(make<LCMain>());
 
@@ -323,6 +341,8 @@ void Writer::createLoadCommands() {
 void Writer::createHiddenSections() {
   headerSection = createInputSection<MachHeaderSection>();
   bindingSection = createInputSection<BindingSection>();
+  stringTableSection = createInputSection<StringTableSection>();
+  symtabSection = createInputSection<SymtabSection>(*stringTableSection);
   createInputSection<PageZeroSection>();
 }
 
@@ -405,6 +425,7 @@ void Writer::run() {
 
   // Fill __LINKEDIT contents.
   bindingSection->finalizeContents();
+  symtabSection->finalizeContents();
 
   // Now that __LINKEDIT is filled out, do a proper calculation of its
   // addresses and offsets. We don't have to recalculate the other segments

diff  --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s
new file mode 100644
index 000000000000..44a016912bd6
--- /dev/null
+++ b/lld/test/MachO/symtab.s
@@ -0,0 +1,54 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: lld -flavor darwinnew -o %t %t.o
+# RUN: llvm-readobj -symbols %t | FileCheck %s
+
+# CHECK:      Symbols [
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: _main
+# CHECK-NEXT:     Extern
+# CHECK-NEXT:     Type: Section (0xE)
+# CHECK-NEXT:     Section: __text (0x1)
+# CHECK-NEXT:     RefType:
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value:
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: bar
+# CHECK-NEXT:     Extern
+# CHECK-NEXT:     Type: Section (0xE)
+# CHECK-NEXT:     Section: __text (0x1)
+# CHECK-NEXT:     RefType:
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value:
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: foo
+# CHECK-NEXT:     Extern
+# CHECK-NEXT:     Type: Section (0xE)
+# CHECK-NEXT:     Section: __data
+# CHECK-NEXT:     RefType:
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value:
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]
+
+.data
+.global foo
+foo:
+  .asciz "Hello world!\n"
+
+.text
+.global bar
+.global _main
+
+_main:
+  mov $0, %rax
+  ret
+
+bar:
+  mov $2, %rax
+  ret


        


More information about the llvm-commits mailing list