[lld] 70fbbcd - Revert "[lld-macho] Support .subsections_via_symbols"
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Tue May 19 08:30:14 PDT 2020
Author: Jez Ng
Date: 2020-05-19T08:30:02-07:00
New Revision: 70fbbcdd3437e9890307ef23d7057c565f142c44
URL: https://github.com/llvm/llvm-project/commit/70fbbcdd3437e9890307ef23d7057c565f142c44
DIFF: https://github.com/llvm/llvm-project/commit/70fbbcdd3437e9890307ef23d7057c565f142c44.diff
LOG: Revert "[lld-macho] Support .subsections_via_symbols"
Due to build breakage mentioned in https://reviews.llvm.org/D79926.
This reverts commit e270b2f1727c0fbde2676e8d0340c0d934726d3c.
Added:
Modified:
lld/MachO/Driver.cpp
lld/MachO/InputFiles.cpp
lld/MachO/InputFiles.h
lld/MachO/InputSection.cpp
lld/MachO/InputSection.h
Removed:
lld/test/MachO/subsections-section-relocs.s
lld/test/MachO/subsections-symbol-relocs.s
################################################################################
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index c653e8612957..ed1d03f80b00 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -326,14 +326,9 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
createSyntheticSections();
// Initialize InputSections.
- for (InputFile *file : inputFiles) {
- for (SubsectionMap &map : file->subsections) {
- for (auto &p : map) {
- InputSection *isec = p.second;
- inputSections.push_back(isec);
- }
- }
- }
+ for (InputFile *file : inputFiles)
+ for (InputSection *sec : file->sections)
+ inputSections.push_back(sec);
// Write to an output file.
writeResult();
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index cfd3df449a9d..c1107431af85 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -127,13 +127,17 @@ static const load_command *findCommand(const mach_header_64 *hdr,
return nullptr;
}
-void InputFile::parseSections(ArrayRef<section_64> sections) {
- subsections.reserve(sections.size());
+std::vector<InputSection *>
+InputFile::parseSections(ArrayRef<section_64> sections) {
+ std::vector<InputSection *> ret;
+ ret.reserve(sections.size());
+
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
for (const section_64 &sec : sections) {
InputSection *isec = make<InputSection>();
isec->file = this;
+ isec->header = &sec;
isec->name = StringRef(sec.sectname, strnlen(sec.sectname, 16));
isec->segname = StringRef(sec.segname, strnlen(sec.segname, 16));
isec->data = {buf + sec.offset, static_cast<size_t>(sec.size)};
@@ -143,185 +147,96 @@ void InputFile::parseSections(ArrayRef<section_64> sections) {
else
isec->align = 1 << sec.align;
isec->flags = sec.flags;
- subsections.push_back({{0, isec}});
+ ret.push_back(isec);
}
-}
-// Find the subsection corresponding to the greatest section offset that is <=
-// that of the given offset.
-//
-// offset: an offset relative to the start of the original InputSection (before
-// any subsection splitting has occurred). It will be updated to represent the
-// same location as an offset relative to the start of the containing
-// subsection.
-static InputSection *findContainingSubsection(SubsectionMap &map,
- uint32_t *offset) {
- auto it = std::prev(map.upper_bound(*offset));
- *offset -= it->first;
- return it->second;
+ return ret;
}
void InputFile::parseRelocations(const section_64 &sec,
- SubsectionMap &subsecMap) {
+ std::vector<Reloc> &relocs) {
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
ArrayRef<any_relocation_info> relInfos(
reinterpret_cast<const any_relocation_info *>(buf + sec.reloff),
sec.nreloc);
for (const any_relocation_info &anyRel : relInfos) {
- if (anyRel.r_word0 & R_SCATTERED)
- fatal("TODO: Scattered relocations not supported");
-
- auto rel = reinterpret_cast<const relocation_info &>(anyRel);
- if (!rel.r_pcrel)
- fatal("TODO: Only pcrel relocations are supported");
-
Reloc r;
- r.type = rel.r_type;
- uint32_t secRelOffset = rel.r_address;
- uint64_t rawAddend =
- target->getImplicitAddend(buf + sec.offset + secRelOffset, r.type);
-
- if (rel.r_extern) {
- r.target = symbols[rel.r_symbolnum];
- r.addend = rawAddend;
+ if (anyRel.r_word0 & R_SCATTERED) {
+ error("TODO: Scattered relocations not supported");
} else {
- if (rel.r_symbolnum == 0 || rel.r_symbolnum > subsections.size())
- fatal("invalid section index in relocation for offset " +
- std::to_string(r.offset) + " in section " + sec.sectname +
- " of " + getName());
-
- SubsectionMap &targetSubsecMap = subsections[rel.r_symbolnum - 1];
- const section_64 &targetSec = sectionHeaders[rel.r_symbolnum - 1];
- // The implicit addend for pcrel section relocations is the pcrel offset
- // in terms of the addresses in the input file. Here we adjust it so that
- // it describes the offset from the start of the target section.
- // TODO: Figure out what to do for non-pcrel section relocations.
- // TODO: The offset of 4 is probably not right for ARM64, nor for
- // relocations with r_length != 2.
- uint32_t targetOffset =
- sec.addr + secRelOffset + 4 + rawAddend - targetSec.addr;
- r.target = findContainingSubsection(targetSubsecMap, &targetOffset);
- r.addend = targetOffset;
+ auto rel = reinterpret_cast<const relocation_info &>(anyRel);
+ r.type = rel.r_type;
+ r.offset = rel.r_address;
+ r.addend = target->getImplicitAddend(buf + sec.offset + r.offset, r.type);
+ if (rel.r_extern) {
+ r.target = symbols[rel.r_symbolnum];
+ } else {
+ if (rel.r_symbolnum == 0 || rel.r_symbolnum > sections.size())
+ fatal("invalid section index in relocation for offset " +
+ std::to_string(r.offset) + " in section " + sec.sectname +
+ " of " + getName());
+ r.target = sections[rel.r_symbolnum - 1];
+ }
}
-
- InputSection *subsec = findContainingSubsection(subsecMap, &secRelOffset);
- r.offset = secRelOffset;
- subsec->relocs.push_back(r);
- }
-}
-
-void InputFile::parseSymbols(ArrayRef<const nlist_64> nList, const char *strtab,
- bool subsectionsViaSymbols) {
- // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols
- // out-of-sequence.
- symbols.resize(nList.size());
- std::vector<size_t> altEntrySymIdxs;
-
- auto createDefined = [&](const nlist_64 &sym, InputSection *isec,
- uint32_t value) -> Symbol * {
- StringRef name = strtab + sym.n_strx;
- if (sym.n_type & N_EXT)
- // Global defined symbol
- return symtab->addDefined(name, isec, value);
- else
- // Local defined symbol
- return make<Defined>(name, isec, value);
- };
-
- for (size_t i = 0, n = nList.size(); i < n; ++i) {
- const nlist_64 &sym = nList[i];
-
- // Undefined symbol
- if (!sym.n_sect) {
- StringRef name = strtab + sym.n_strx;
- symbols[i] = symtab->addUndefined(name);
- continue;
- }
-
- const section_64 &sec = sectionHeaders[sym.n_sect - 1];
- SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
- uint64_t offset = sym.n_value - sec.addr;
-
- // If the input file does not use subsections-via-symbols, all symbols can
- // use the same subsection. Otherwise, we must split the sections along
- // symbol boundaries.
- if (!subsectionsViaSymbols) {
- symbols[i] = createDefined(sym, subsecMap[0], offset);
- continue;
- }
-
- // nList entries aren't necessarily arranged in address order. Therefore,
- // we can't create alt-entry symbols at this point because a later symbol
- // may split its section, which may affect which subsection the alt-entry
- // symbol is assigned to. So we need to handle them in a second pass below.
- if (sym.n_desc & N_ALT_ENTRY) {
- altEntrySymIdxs.push_back(i);
- continue;
- }
-
- // Find the subsection corresponding to the greatest section offset that is
- // <= that of the current symbol. The subsection that we find either needs
- // to be used directly or split in two.
- uint32_t firstSize = offset;
- InputSection *firstIsec = findContainingSubsection(subsecMap, &firstSize);
-
- if (firstSize == 0) {
- // Alias of an existing symbol, or the first symbol in the section. These
- // are handled by reusing the existing section.
- symbols[i] = createDefined(sym, firstIsec, 0);
- continue;
- }
-
- // We saw a symbol definition at a new offset. Split the section into two
- // subsections. The new symbol uses the second subsection.
- auto *secondIsec = make<InputSection>(*firstIsec);
- secondIsec->data = firstIsec->data.slice(firstSize);
- firstIsec->data = firstIsec->data.slice(0, firstSize);
- // TODO: ld64 appears to preserve the original alignment as well as each
- // subsection's offset from the last aligned address. We should consider
- // emulating that behavior.
- secondIsec->align = MinAlign(firstIsec->align, offset);
-
- subsecMap[offset] = secondIsec;
- // By construction, the symbol will be at offset zero in the new section.
- symbols[i] = createDefined(sym, secondIsec, 0);
- }
-
- for (size_t idx : altEntrySymIdxs) {
- const nlist_64 &sym = nList[idx];
- SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
- uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr;
- InputSection *subsec = findContainingSubsection(subsecMap, &off);
- symbols[idx] = createDefined(sym, subsec, off);
+ relocs.push_back(r);
}
}
ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) {
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
+ ArrayRef<section_64> objSections;
if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) {
auto *c = reinterpret_cast<const segment_command_64 *>(cmd);
- sectionHeaders = ArrayRef<section_64>{
+ objSections = ArrayRef<section_64>{
reinterpret_cast<const section_64 *>(c + 1), c->nsects};
- parseSections(sectionHeaders);
+ sections = parseSections(objSections);
}
// TODO: Error on missing LC_SYMTAB?
if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
auto *c = reinterpret_cast<const symtab_command *>(cmd);
+ const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
ArrayRef<const nlist_64> nList(
reinterpret_cast<const nlist_64 *>(buf + c->symoff), c->nsyms);
- const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
- bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
- parseSymbols(nList, strtab, subsectionsViaSymbols);
+
+ symbols.reserve(c->nsyms);
+
+ for (const nlist_64 &sym : nList) {
+ StringRef name = strtab + sym.n_strx;
+
+ // Undefined symbol
+ if (!sym.n_sect) {
+ symbols.push_back(symtab->addUndefined(name));
+ continue;
+ }
+
+ InputSection *isec = sections[sym.n_sect - 1];
+ const section_64 &objSec = objSections[sym.n_sect - 1];
+ uint64_t value = sym.n_value - objSec.addr;
+
+ // Global defined symbol
+ if (sym.n_type & N_EXT) {
+ symbols.push_back(symtab->addDefined(name, isec, value));
+ continue;
+ }
+
+ // Local defined symbol
+ symbols.push_back(make<Defined>(name, isec, value));
+ }
}
// The relocations may refer to the symbols, so we parse them after we have
- // parsed all the symbols.
- for (size_t i = 0, n = subsections.size(); i < n; ++i)
- parseRelocations(sectionHeaders[i], subsections[i]);
+ // the symbols loaded.
+ if (!sections.empty()) {
+ auto it = sections.begin();
+ for (const section_64 &sec : objSections) {
+ parseRelocations(sec, (*it)->relocs);
+ ++it;
+ }
+ }
}
DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
@@ -409,8 +324,7 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
sym.getName());
auto file = make<ObjFile>(mb);
symbols.insert(symbols.end(), file->symbols.begin(), file->symbols.end());
- subsections.insert(subsections.end(), file->subsections.begin(),
- file->subsections.end());
+ sections.insert(sections.end(), file->sections.begin(), file->sections.end());
}
// Returns "<internal>" or "baz.o".
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 5d2bfb03e904..c94035b1bcf6 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -14,8 +14,6 @@
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Support/MemoryBuffer.h"
-
-#include <map>
#include <vector>
namespace lld {
@@ -25,11 +23,6 @@ class InputSection;
class Symbol;
struct Reloc;
-// If .subsections_via_symbols is set, each InputSection will be split along
-// symbol boundaries. The keys of a SubsectionMap represent the offsets of
-// each subsection from the start of the original pre-split InputSection.
-using SubsectionMap = std::map<uint32_t, InputSection *>;
-
class InputFile {
public:
enum Kind {
@@ -44,18 +37,15 @@ class InputFile {
MemoryBufferRef mb;
std::vector<Symbol *> symbols;
- ArrayRef<llvm::MachO::section_64> sectionHeaders;
- std::vector<SubsectionMap> subsections;
+ std::vector<InputSection *> sections;
protected:
InputFile(Kind kind, MemoryBufferRef mb) : mb(mb), fileKind(kind) {}
- void parseSections(ArrayRef<llvm::MachO::section_64>);
-
- void parseSymbols(ArrayRef<const llvm::MachO::nlist_64> nList,
- const char *strtab, bool subsectionsViaSymbols);
+ std::vector<InputSection *> parseSections(ArrayRef<llvm::MachO::section_64>);
- void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &);
+ void parseRelocations(const llvm::MachO::section_64 &,
+ std::vector<Reloc> &relocs);
private:
const Kind fileKind;
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 5453c0b8cd78..ace30c8c41dc 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -32,6 +32,7 @@ void InputSection::writeTo(uint8_t *buf) {
for (Reloc &r : relocs) {
uint64_t va = 0;
+ uint64_t addend = r.addend;
if (auto *s = r.target.dyn_cast<Symbol *>()) {
if (auto *dylibSymbol = dyn_cast<DylibSymbol>(s)) {
va = target->getDylibSymbolVA(*dylibSymbol, r.type);
@@ -40,9 +41,15 @@ void InputSection::writeTo(uint8_t *buf) {
}
} else if (auto *isec = r.target.dyn_cast<InputSection *>()) {
va = isec->getVA();
+ // The implicit addend for pcrel section relocations is the pcrel offset
+ // in terms of the addresses in the input file. Here we adjust it so that
+ // it describes the offset from the start of the target section.
+ // TODO: Figure out what to do for non-pcrel section relocations.
+ // TODO: The offset of 4 is probably not right for ARM64.
+ addend -= isec->header->addr - (header->addr + r.offset + 4);
}
- uint64_t val = va + r.addend;
+ uint64_t val = va + addend;
if (1) // TODO: handle non-pcrel relocations
val -= getVA() + r.offset;
target->relocateOne(buf + r.offset, r.type, val);
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 1d11b8e64c30..908f09e6d29d 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -24,11 +24,7 @@ class Symbol;
struct Reloc {
uint8_t type;
- // Adding this offset to the address of the target symbol or subsection gives
- // the destination that this relocation refers to.
uint32_t addend;
- // The offset from the start of the subsection that this relocation belongs
- // to.
uint32_t offset;
llvm::PointerUnion<Symbol *, InputSection *> target;
};
@@ -46,6 +42,8 @@ class InputSection {
InputFile *file = nullptr;
StringRef name;
StringRef segname;
+ // This provides access to the address of the section in the input file.
+ const llvm::MachO::section_64 *header;
OutputSection *parent = nullptr;
uint64_t outSecOff = 0;
diff --git a/lld/test/MachO/subsections-section-relocs.s b/lld/test/MachO/subsections-section-relocs.s
deleted file mode 100644
index e8a8d7a3ec40..000000000000
--- a/lld/test/MachO/subsections-section-relocs.s
+++ /dev/null
@@ -1,47 +0,0 @@
-# REQUIRES: x86
-# RUN: mkdir -p %t
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
-
-# RUN: echo "_bar_str" > %t/order-file
-# RUN: echo "_foo_str" >> %t/order-file
-
-# RUN: lld -flavor darwinnew -o %t/test %t/test.o -order_file %t/order-file
-# RUN: llvm-objdump --section-headers -d --no-show-raw-insn %t/test | FileCheck %s
-# CHECK-LABEL: Sections:
-# CHECK: __cstring {{[^ ]*}} {{0*}}[[#%x, CSTRING_ADDR:]]
-# CHECK-LABEL: Disassembly of section __TEXT,__text:
-## L._str should end up at CSTRING_ADDR + 4, and leaq is 7 bytes long so we
-## have RIP = ADDR + 7
-# CHECK: [[#%x, ADDR:]]: leaq
-# CHECK-SAME: [[#%u, CSTRING_ADDR + 4 - ADDR - 7]](%rip), %rsi {{.*}} <_bar_str+0x4>
-
-# RUN: llvm-readobj --string-dump=__cstring %t/test | FileCheck %s --check-prefix=STRINGS
-# STRINGS: bar
-# STRINGS: Private symbol
-# STRINGS: foo
-
-.text
-.globl _main, _foo_str, _bar_str
-
-_main:
- leaq L_.str(%rip), %rsi
- mov $0, %rax
- ret
-
-.section __TEXT,__cstring
-_foo_str:
- .asciz "foo"
-
-_bar_str:
- .asciz "bar"
-
-## References to this generate a section relocation
-## N.B.: ld64 doesn't actually reorder symbols in __cstring based on the order
-## file. Only our implementation does. However, I'm not sure how else to
-## test section relocations that target an address inside a relocated
-## symbol: using a non-__cstring section would cause llvm-mc to emit a
-## symbol relocation instead using the nearest symbol.
-L_.str:
- .asciz "Private symbol"
-
-.subsections_via_symbols
diff --git a/lld/test/MachO/subsections-symbol-relocs.s b/lld/test/MachO/subsections-symbol-relocs.s
deleted file mode 100644
index 475c909377da..000000000000
--- a/lld/test/MachO/subsections-symbol-relocs.s
+++ /dev/null
@@ -1,55 +0,0 @@
-# REQUIRES: x86
-# RUN: mkdir -p %t
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
-
-# RUN: echo "_bar" > %t/order-file-1
-# RUN: echo "_foo" >> %t/order-file-1
-# RUN: echo "_main" >> %t/order-file-1
-## _qux is marked as .alt_entry, so it should not create a new subsection and
-## its contents should move with _bar to the start of the output despite the
-## order file listing it at the end.
-# RUN: echo "_qux" >> %t/order-file-1
-
-## _bar and _baz point to the same address, so both order files should achieve
-## the same result.
-# RUN: echo "_baz" > %t/order-file-2
-# RUN: echo "_foo" >> %t/order-file-2
-# RUN: echo "_main" >> %t/order-file-2
-# RUN: echo "_qux" >> %t/order-file-2
-
-# RUN: lld -flavor darwinnew -o %t/test-1 %t/test.o -order_file %t/order-file-1
-# RUN: llvm-objdump -d --no-show-raw-insn %t/test-1 | FileCheck %s
-# RUN: lld -flavor darwinnew -o %t/test-2 %t/test.o -order_file %t/order-file-2
-# RUN: llvm-objdump -d --no-show-raw-insn %t/test-2 | FileCheck %s
-# CHECK-LABEL: Disassembly of section __TEXT,__text:
-# CHECK: <_bar>:
-# CHECK-NEXT: callq {{.*}} <_foo>
-# CHECK-EMPTY:
-# CHECK-NEXT: <_qux>:
-# CHECK-NEXT: retq
-# CHECK: <_foo>:
-# CHECK-NEXT: retq
-# CHECK: <_main>:
-# CHECK-NEXT: callq {{.*}} <_bar>
-# CHECK-NEXT: movq $0, %rax
-# CHECK-NEXT: retq
-
-.text
-.globl _main, _foo, _bar, _qux
-.alt_entry _qux
-
-_foo:
- retq
-
-_main:
- callq _bar
- movq $0, %rax
- retq
-
-_bar:
-_baz:
- callq _foo
-_qux:
- retq
-
-.subsections_via_symbols
More information about the llvm-commits
mailing list