[lld] 4f0cccd - [lld-macho][reland] Add basic symbol table output
Shoaib Meenai via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 28 17:09:01 PDT 2020
Author: Jez Ng
Date: 2020-04-28T17:07:06-07:00
New Revision: 4f0cccdd7a06ff60d3271638f47082b65f3793f1
URL: https://github.com/llvm/llvm-project/commit/4f0cccdd7a06ff60d3271638f47082b65f3793f1
DIFF: https://github.com/llvm/llvm-project/commit/4f0cccdd7a06ff60d3271638f47082b65f3793f1.diff
LOG: [lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem at fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
Added:
lld/test/MachO/symtab.s
Modified:
lld/MachO/SyntheticSections.cpp
lld/MachO/SyntheticSections.h
lld/MachO/Writer.cpp
Removed:
################################################################################
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index d4af5a706a80..df963e24d6c2 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -9,6 +9,7 @@
#include "SyntheticSections.h"
#include "InputFiles.h"
#include "OutputSegment.h"
+#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
@@ -128,6 +129,62 @@ void BindingSection::writeTo(uint8_t *buf) {
memcpy(buf, contents.data(), contents.size());
}
+SymtabSection::SymtabSection(StringTableSection &stringTableSection)
+ : stringTableSection(stringTableSection) {
+ segname = segment_names::linkEdit;
+ name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
+}
+
+size_t SymtabSection::getSize() const {
+ return symbols.size() * sizeof(nlist_64);
+}
+
+void SymtabSection::finalizeContents() {
+ // TODO: We should filter out some symbols.
+ for (Symbol *sym : symtab->getSymbols())
+ symbols.push_back({sym, stringTableSection.addString(sym->getName())});
+}
+
+void SymtabSection::writeTo(uint8_t *buf) {
+ auto *nList = reinterpret_cast<nlist_64 *>(buf);
+ for (const SymtabEntry &entry : symbols) {
+ // TODO support other symbol types
+ // TODO populate n_desc
+ if (auto defined = dyn_cast<Defined>(entry.sym)) {
+ nList->n_strx = entry.strx;
+ nList->n_type = N_EXT | N_SECT;
+ nList->n_sect = defined->isec->sectionIndex;
+ // For the N_SECT symbol type, n_value is the address of the symbol
+ nList->n_value = defined->value + defined->isec->addr;
+ }
+
+ ++nList;
+ }
+}
+
+StringTableSection::StringTableSection() {
+ segname = segment_names::linkEdit;
+ name = section_names::stringTable;
+}
+
+uint32_t StringTableSection::addString(StringRef str) {
+ uint32_t strx = size;
+ strings.push_back(str);
+ size += str.size() + 1; // account for null terminator
+ return strx;
+}
+
+void StringTableSection::writeTo(uint8_t *buf) {
+ uint32_t off = 0;
+ for (StringRef str : strings) {
+ memcpy(buf + off, str.data(), str.size());
+ off += str.size() + 1; // account for null terminator
+ }
+}
+
InStruct in;
} // namespace macho
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 2adc5754a7e8..3988772906ec 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -23,6 +23,8 @@ namespace section_names {
constexpr const char *pageZero = "__pagezero";
constexpr const char *header = "__mach_header";
constexpr const char *binding = "__binding";
+constexpr const char *symbolTable = "__symbol_table";
+constexpr const char *stringTable = "__string_table";
} // namespace section_names
@@ -93,6 +95,49 @@ class BindingSection : public InputSection {
SmallVector<char, 128> contents;
};
+// Stores the strings referenced by the symbol table.
+class StringTableSection : public InputSection {
+public:
+ StringTableSection();
+ // Returns the start offset of the added string.
+ uint32_t addString(StringRef);
+ size_t getSize() const override { return size; }
+ // Like other sections in __LINKEDIT, the string table section is special: its
+ // offsets are recorded in the LC_SYMTAB load command, instead of in section
+ // headers.
+ bool isHidden() const override { return true; }
+ void writeTo(uint8_t *buf) override;
+
+private:
+ // An n_strx value of 0 always indicates the empty string, so we must locate
+ // our non-empty string values at positive offsets in the string table.
+ // Therefore we insert a dummy value at position zero.
+ std::vector<StringRef> strings{"\0"};
+ size_t size = 1;
+};
+
+struct SymtabEntry {
+ Symbol *sym;
+ size_t strx;
+};
+
+class SymtabSection : public InputSection {
+public:
+ SymtabSection(StringTableSection &);
+ void finalizeContents();
+ size_t getNumSymbols() const { return symbols.size(); }
+ size_t getSize() const override;
+ // Like other sections in __LINKEDIT, the symtab section is special: its
+ // offsets are recorded in the LC_SYMTAB load command, instead of in section
+ // headers.
+ bool isHidden() const override { return true; }
+ void writeTo(uint8_t *buf) override;
+
+private:
+ StringTableSection &stringTableSection;
+ std::vector<SymtabEntry> symbols;
+};
+
struct InStruct {
GotSection *got = nullptr;
};
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index b6e5ed0c5e4d..1b8ce5f2ff56 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -52,6 +52,8 @@ class Writer {
uint64_t fileOff = 0;
MachHeaderSection *headerSection = nullptr;
BindingSection *bindingSection = nullptr;
+ SymtabSection *symtabSection = nullptr;
+ StringTableSection *stringTableSection = nullptr;
};
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@@ -163,13 +165,23 @@ class LCMain : public LoadCommand {
class LCSymtab : public LoadCommand {
public:
+ LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection)
+ : symtabSection(symtabSection), stringTableSection(stringTableSection) {}
+
uint32_t getSize() const override { return sizeof(symtab_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<symtab_command *>(buf);
c->cmd = LC_SYMTAB;
c->cmdsize = getSize();
+ c->symoff = symtabSection->getFileOffset();
+ c->nsyms = symtabSection->getNumSymbols();
+ c->stroff = stringTableSection->getFileOffset();
+ c->strsize = stringTableSection->getFileSize();
}
+
+ SymtabSection *symtabSection = nullptr;
+ StringTableSection *stringTableSection = nullptr;
};
class LCLoadDylib : public LoadCommand {
@@ -238,7 +250,12 @@ class SectionComparator {
{defaultPosition, {}},
// Make sure __LINKEDIT is the last segment (i.e. all its hidden
// sections must be ordered after other sections).
- {segment_names::linkEdit, {section_names::binding}},
+ {segment_names::linkEdit,
+ {
+ section_names::binding,
+ section_names::symbolTable,
+ section_names::stringTable,
+ }},
};
for (uint32_t i = 0, n = ordering.size(); i < n; ++i) {
@@ -294,7 +311,8 @@ void Writer::scanRelocations() {
void Writer::createLoadCommands() {
headerSection->addLoadCommand(make<LCDyldInfo>(bindingSection));
headerSection->addLoadCommand(make<LCLoadDylinker>());
- headerSection->addLoadCommand(make<LCSymtab>());
+ headerSection->addLoadCommand(
+ make<LCSymtab>(symtabSection, stringTableSection));
headerSection->addLoadCommand(make<LCDysymtab>());
headerSection->addLoadCommand(make<LCMain>());
@@ -323,6 +341,8 @@ void Writer::createLoadCommands() {
void Writer::createHiddenSections() {
headerSection = createInputSection<MachHeaderSection>();
bindingSection = createInputSection<BindingSection>();
+ stringTableSection = createInputSection<StringTableSection>();
+ symtabSection = createInputSection<SymtabSection>(*stringTableSection);
createInputSection<PageZeroSection>();
}
@@ -351,6 +371,9 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -376,6 +399,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
@@ -405,6 +429,7 @@ void Writer::run() {
// Fill __LINKEDIT contents.
bindingSection->finalizeContents();
+ symtabSection->finalizeContents();
// Now that __LINKEDIT is filled out, do a proper calculation of its
// addresses and offsets. We don't have to recalculate the other segments
diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s
new file mode 100644
index 000000000000..44a016912bd6
--- /dev/null
+++ b/lld/test/MachO/symtab.s
@@ -0,0 +1,54 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: lld -flavor darwinnew -o %t %t.o
+# RUN: llvm-readobj -symbols %t | FileCheck %s
+
+# CHECK: Symbols [
+# CHECK-NEXT: Symbol {
+# CHECK-NEXT: Name: _main
+# CHECK-NEXT: Extern
+# CHECK-NEXT: Type: Section (0xE)
+# CHECK-NEXT: Section: __text (0x1)
+# CHECK-NEXT: RefType:
+# CHECK-NEXT: Flags [ (0x0)
+# CHECK-NEXT: ]
+# CHECK-NEXT: Value:
+# CHECK-NEXT: }
+# CHECK-NEXT: Symbol {
+# CHECK-NEXT: Name: bar
+# CHECK-NEXT: Extern
+# CHECK-NEXT: Type: Section (0xE)
+# CHECK-NEXT: Section: __text (0x1)
+# CHECK-NEXT: RefType:
+# CHECK-NEXT: Flags [ (0x0)
+# CHECK-NEXT: ]
+# CHECK-NEXT: Value:
+# CHECK-NEXT: }
+# CHECK-NEXT: Symbol {
+# CHECK-NEXT: Name: foo
+# CHECK-NEXT: Extern
+# CHECK-NEXT: Type: Section (0xE)
+# CHECK-NEXT: Section: __data
+# CHECK-NEXT: RefType:
+# CHECK-NEXT: Flags [ (0x0)
+# CHECK-NEXT: ]
+# CHECK-NEXT: Value:
+# CHECK-NEXT: }
+# CHECK-NEXT: ]
+
+.data
+.global foo
+foo:
+ .asciz "Hello world!\n"
+
+.text
+.global bar
+.global _main
+
+_main:
+ mov $0, %rax
+ ret
+
+bar:
+ mov $2, %rax
+ ret
More information about the llvm-commits
mailing list