[lld] e183bf8 - [lld-macho][reland] Initial support for EH Frames
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 13 04:47:29 PDT 2022
Author: Jez Ng
Date: 2022-06-13T07:45:16-04:00
New Revision: e183bf8e1599d98bade8fe4a5774c23b9a67655b
URL: https://github.com/llvm/llvm-project/commit/e183bf8e1599d98bade8fe4a5774c23b9a67655b
DIFF: https://github.com/llvm/llvm-project/commit/e183bf8e1599d98bade8fe4a5774c23b9a67655b.diff
LOG: [lld-macho][reland] Initial support for EH Frames
This reverts commit 942f4e3a7cc9a9f8b2654817cff12907d1276031.
The additional change required to avoid the assertion errors seen
previously is:
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -443,7 +443,9 @@ void macho::foldIdenticalSections() {
/*relocVA=*/0);
isec->data = copy;
}
- } else {
+ } else if (!isEhFrameSection(isec)) {
+ // EH frames are gathered as hashables from unwindEntry above; give a
+ // unique ID to everything else.
isec->icfEqClass[0] = ++icfUniqueID;
}
}
Differential Revision: https://reviews.llvm.org/D123435
Added:
lld/MachO/EhFrame.cpp
lld/MachO/EhFrame.h
lld/test/MachO/Inputs/eh-frame-x86_64-r.o
lld/test/MachO/eh-frame.s
lld/test/MachO/invalid/eh-frame.s
Modified:
lld/MachO/Arch/ARM64.cpp
lld/MachO/Arch/ARM64_32.cpp
lld/MachO/Arch/X86_64.cpp
lld/MachO/CMakeLists.txt
lld/MachO/Config.h
lld/MachO/Driver.cpp
lld/MachO/ICF.cpp
lld/MachO/InputFiles.cpp
lld/MachO/InputFiles.h
lld/MachO/InputSection.cpp
lld/MachO/InputSection.h
lld/MachO/Relocations.h
lld/MachO/Symbols.h
lld/MachO/Target.h
lld/MachO/UnwindInfoSection.cpp
lld/MachO/Writer.cpp
lld/test/MachO/obj-file-with-stabs.s
lld/test/MachO/tools/generate-cfi-funcs.py
Removed:
################################################################################
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 932a081acf392..e5b8a1b722f66 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -13,6 +13,7 @@
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
+#include "mach-o/compact_unwind_encoding.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -141,6 +142,10 @@ ARM64::ARM64() : ARM64Common(LP64()) {
backwardBranchRange = 128 * 1024 * 1024;
forwardBranchRange = backwardBranchRange - 4;
+ modeDwarfEncoding = UNWIND_ARM64_MODE_DWARF;
+ subtractorRelocType = ARM64_RELOC_SUBTRACTOR;
+ unsignedRelocType = ARM64_RELOC_UNSIGNED;
+
stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
stubHelperEntrySize = sizeof(stubHelperEntryCode);
}
diff --git a/lld/MachO/Arch/ARM64_32.cpp b/lld/MachO/Arch/ARM64_32.cpp
index b1036912c39a8..5be411e403420 100644
--- a/lld/MachO/Arch/ARM64_32.cpp
+++ b/lld/MachO/Arch/ARM64_32.cpp
@@ -105,6 +105,10 @@ ARM64_32::ARM64_32() : ARM64Common(ILP32()) {
cpuType = CPU_TYPE_ARM64_32;
cpuSubtype = CPU_SUBTYPE_ARM64_V8;
+ modeDwarfEncoding = 0x04000000; // UNWIND_ARM_MODE_DWARF
+ subtractorRelocType = GENERIC_RELOC_INVALID; // FIXME
+ unsignedRelocType = GENERIC_RELOC_INVALID; // FIXME
+
stubSize = sizeof(stubCode);
stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
stubHelperEntrySize = sizeof(stubHelperEntryCode);
diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 8b998288bb524..d675356b9ffb9 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -12,6 +12,7 @@
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
+#include "mach-o/compact_unwind_encoding.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Endian.h"
@@ -185,6 +186,10 @@ X86_64::X86_64() : TargetInfo(LP64()) {
cpuType = CPU_TYPE_X86_64;
cpuSubtype = CPU_SUBTYPE_X86_64_ALL;
+ modeDwarfEncoding = UNWIND_X86_MODE_DWARF;
+ subtractorRelocType = X86_64_RELOC_SUBTRACTOR;
+ unsignedRelocType = X86_64_RELOC_UNSIGNED;
+
stubSize = sizeof(stub);
stubHelperHeaderSize = sizeof(stubHelperHeader);
stubHelperEntrySize = sizeof(stubHelperEntry);
diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 4bd0816bca66f..f8b1d45c489a3 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -14,6 +14,7 @@ add_lld_library(lldMachO
Driver.cpp
DriverUtils.cpp
Dwarf.cpp
+ EhFrame.cpp
ExportTrie.cpp
ICF.cpp
InputFiles.cpp
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index de64dc47591d1..69e42c596d16c 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -130,6 +130,9 @@ struct Configuration {
bool dedupLiterals = true;
bool omitDebugInfo = false;
bool warnDylibInstallName = false;
+ // Temporary config flag that will be removed once we have fully implemented
+ // support for __eh_frame.
+ bool parseEhFrames = false;
uint32_t headerPad;
uint32_t dylibCompatibilityVersion = 0;
uint32_t dylibCurrentVersion = 0;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 9e3ea8eedcfcc..0a86b09f911bb 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1039,8 +1039,9 @@ static void gatherInputSections() {
int inputOrder = 0;
for (const InputFile *file : inputFiles) {
for (const Section *section : file->sections) {
+ // Compact unwind entries require special handling elsewhere. (In
+ // contrast, EH frames are handled like regular ConcatInputSections.)
if (section->name == section_names::compactUnwind)
- // Compact unwind entries require special handling elsewhere.
continue;
ConcatOutputSection *osec = nullptr;
for (const Subsection &subsection : section->subsections) {
@@ -1302,6 +1303,7 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->callGraphProfileSort = args.hasFlag(
OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true);
config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order);
+ config->parseEhFrames = static_cast<bool>(getenv("LLD_IN_TEST"));
// FIXME: Add a commandline flag for this too.
config->zeroModTime = getenv("ZERO_AR_DATE");
diff --git a/lld/MachO/EhFrame.cpp b/lld/MachO/EhFrame.cpp
new file mode 100644
index 0000000000000..50d8accc0596e
--- /dev/null
+++ b/lld/MachO/EhFrame.cpp
@@ -0,0 +1,140 @@
+//===- EhFrame.cpp --------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "EhFrame.h"
+#include "InputFiles.h"
+
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace lld;
+using namespace lld::macho;
+using namespace llvm::support::endian;
+
+uint64_t EhReader::readLength(size_t *off) const {
+ const size_t errOff = *off;
+ if (*off + 4 > data.size())
+ failOn(errOff, "CIE/FDE too small");
+ uint64_t len = read32le(data.data() + *off);
+ *off += 4;
+ if (len == dwarf::DW_LENGTH_DWARF64) {
+ // FIXME: test this DWARF64 code path
+ if (*off + 8 > data.size())
+ failOn(errOff, "CIE/FDE too small");
+ len = read64le(data.data() + *off);
+ *off += 8;
+ }
+ if (*off + len > data.size())
+ failOn(errOff, "CIE/FDE extends past the end of the section");
+ return len;
+}
+
+void EhReader::skipValidLength(size_t *off) const {
+ uint32_t len = read32le(data.data() + *off);
+ *off += 4;
+ if (len == dwarf::DW_LENGTH_DWARF64)
+ *off += 8;
+}
+
+// Read a byte and advance off by one byte.
+uint8_t EhReader::readByte(size_t *off) const {
+ if (*off + 1 > data.size())
+ failOn(*off, "unexpected end of CIE/FDE");
+ return data[(*off)++];
+}
+
+uint32_t EhReader::readU32(size_t *off) const {
+ if (*off + 4 > data.size())
+ failOn(*off, "unexpected end of CIE/FDE");
+ uint32_t v = read32le(data.data() + *off);
+ *off += 4;
+ return v;
+}
+
+uint64_t EhReader::readPointer(size_t *off) const {
+ if (*off + wordSize > data.size())
+ failOn(*off, "unexpected end of CIE/FDE");
+ uint64_t v;
+ if (wordSize == 8)
+ v = read64le(data.data() + *off);
+ else {
+ assert(wordSize == 4);
+ v = read32le(data.data() + *off);
+ }
+ *off += wordSize;
+ return v;
+}
+
+// Read a null-terminated string.
+StringRef EhReader::readString(size_t *off) const {
+ if (*off > data.size())
+ failOn(*off, "corrupted CIE (failed to read string)");
+ const size_t maxlen = data.size() - *off;
+ auto *c = reinterpret_cast<const char *>(data.data() + *off);
+ size_t len = strnlen(c, maxlen);
+ if (len == maxlen) // we failed to find the null terminator
+ failOn(*off, "corrupted CIE (failed to read string)");
+ *off += len + 1; // skip the null byte too
+ return StringRef(c, len);
+}
+
+void EhReader::skipLeb128(size_t *off) const {
+ const size_t errOff = *off;
+ while (*off < data.size()) {
+ uint8_t val = data[(*off)++];
+ if ((val & 0x80) == 0)
+ return;
+ }
+ failOn(errOff, "corrupted CIE (failed to read LEB128)");
+}
+
+void EhReader::failOn(size_t errOff, const Twine &msg) const {
+ fatal(toString(file) + ":(__eh_frame+0x" +
+ Twine::utohexstr(dataOff + errOff) + "): " + msg);
+}
+
+/*
+ * Create a pair of relocs to write the value of:
+ * `b - (offset + a)` if Invert == false
+ * `(a + offset) - b` if Invert == true
+ */
+template <bool Invert = false>
+static void createSubtraction(PointerUnion<Symbol *, InputSection *> a,
+ PointerUnion<Symbol *, InputSection *> b,
+ uint64_t off, uint8_t length,
+ SmallVectorImpl<Reloc> *newRelocs) {
+ auto subtrahend = a;
+ auto minuend = b;
+ if (Invert)
+ std::swap(subtrahend, minuend);
+ assert(subtrahend.is<Symbol *>());
+ Reloc subtrahendReloc(target->subtractorRelocType, /*pcrel=*/false, length,
+ off, /*addend=*/0, subtrahend);
+ Reloc minuendReloc(target->unsignedRelocType, /*pcrel=*/false, length, off,
+ (Invert ? 1 : -1) * off, minuend);
+ newRelocs->push_back(subtrahendReloc);
+ newRelocs->push_back(minuendReloc);
+}
+
+void EhRelocator::makePcRel(uint64_t off,
+ PointerUnion<Symbol *, InputSection *> target,
+ uint8_t length) {
+ createSubtraction(isec->symbols[0], target, off, length, &newRelocs);
+}
+
+void EhRelocator::makeNegativePcRel(
+ uint64_t off, PointerUnion<Symbol *, InputSection *> target,
+ uint8_t length) {
+ createSubtraction</*Invert=*/true>(isec, target, off, length, &newRelocs);
+}
+
+void EhRelocator::commit() {
+ isec->relocs.insert(isec->relocs.end(), newRelocs.begin(), newRelocs.end());
+}
diff --git a/lld/MachO/EhFrame.h b/lld/MachO/EhFrame.h
new file mode 100644
index 0000000000000..c8269b941bcfc
--- /dev/null
+++ b/lld/MachO/EhFrame.h
@@ -0,0 +1,120 @@
+//===- EhFrame.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_EH_FRAME_H
+#define LLD_MACHO_EH_FRAME_H
+
+#include "InputSection.h"
+#include "Relocations.h"
+
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
+
+/*
+ * NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it
+ * is closely coupled with other file parsing logic; EhFrame.h just contains a
+ * few helpers.
+ */
+
+/*
+ * === The EH frame format ===
+ *
+ * EH frames can either be Common Information Entries (CIEs) or Frame
+ * Description Entries (FDEs). CIEs contain information that is common amongst
+ * several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame
+ * entries together form a forest of two-level trees, with CIEs as the roots
+ * and FDEs as the leaves. Note that a CIE must precede the FDEs which point
+ * to it.
+ *
+ * A CIE comprises the following fields in order:
+ * 1. Length of the entry (4 or 12 bytes)
+ * 2. CIE offset (4 bytes; always 0 for CIEs)
+ * 3. CIE version (byte)
+ * 4. Null-terminated augmentation string
+ * 5-8. LEB128 values that we don't care about
+ * 9. Augmentation data, to be interpreted using the aug string
+ * 10. DWARF instructions (ignored by LLD)
+ *
+ * An FDE comprises of the following:
+ * 1. Length of the entry (4 or 12 bytes)
+ * 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE)
+ * 3. Function address (pointer-sized pcrel offset)
+ * 4. (Optional) Augmentation data length
+ * 5. (Optional) LSDA address (pointer-sized pcrel offset)
+ * 6. DWARF instructions (ignored by LLD)
+ */
+namespace lld {
+namespace macho {
+
+class EhReader {
+public:
+ EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff,
+ size_t wordSize)
+ : file(file), data(data), dataOff(dataOff), wordSize(wordSize) {}
+ size_t size() const { return data.size(); }
+ // Read and validate the length field.
+ uint64_t readLength(size_t *off) const;
+ // Skip the length field without doing validation.
+ void skipValidLength(size_t *off) const;
+ uint8_t readByte(size_t *off) const;
+ uint32_t readU32(size_t *off) const;
+ uint64_t readPointer(size_t *off) const;
+ StringRef readString(size_t *off) const;
+ void skipLeb128(size_t *off) const;
+ void failOn(size_t errOff, const Twine &msg) const;
+
+private:
+ const ObjFile *file;
+ ArrayRef<uint8_t> data;
+ // The offset of the data array within its section. Used only for error
+ // reporting.
+ const size_t dataOff;
+ size_t wordSize;
+};
+
+// The EH frame format, when emitted by llvm-mc, consists of a number of
+// "abs-ified" relocations, i.e. relocations that are implicitly encoded as
+// pcrel offsets in the section data. The offsets refer to the locations of
+// symbols in the input object file. When we ingest these EH frames, we convert
+// these implicit relocations into explicit Relocs.
+//
+// These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4.
+// However, we need this operation to be cross-platform, and ARM does not have a
+// similar relocation that is applicable. We therefore use the more verbose (but
+// more generic) subtractor relocation to encode these pcrel values. ld64
+// appears to do something similar -- its `-r` output contains these explicit
+// subtractor relocations.
+class EhRelocator {
+public:
+ EhRelocator(InputSection *isec) : isec(isec) {}
+
+ // For the next two methods, let `PC` denote `isec address + off`.
+ // Create relocs writing the value of target - PC to PC.
+ void makePcRel(uint64_t off,
+ llvm::PointerUnion<Symbol *, InputSection *> target,
+ uint8_t length);
+ // Create relocs writing the value of PC - target to PC.
+ void makeNegativePcRel(uint64_t off,
+ llvm::PointerUnion<Symbol *, InputSection *> target,
+ uint8_t length);
+ // Insert the new relocations into isec->relocs.
+ void commit();
+
+private:
+ InputSection *isec;
+ // Insert new relocs here so that we don't invalidate iterators into the
+ // existing relocs vector.
+ SmallVector<Reloc, 6> newRelocs;
+};
+
+} // namespace macho
+} // namespace lld
+
+#endif
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index e359443430128..6d10262826e54 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -212,9 +212,9 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
// info matches. For simplicity, we only handle the case where there are only
// symbols at offset zero within the section (which is typically the case with
// .subsections_via_symbols.)
- auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; };
- auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU);
- auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU);
+ auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; };
+ auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasUnwind);
+ auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasUnwind);
if (itA == ia->symbols.end())
return itB == ib->symbols.end();
if (itB == ib->symbols.end())
@@ -443,7 +443,9 @@ void macho::foldIdenticalSections() {
/*relocVA=*/0);
isec->data = copy;
}
- } else {
+ } else if (!isEhFrameSection(isec)) {
+ // EH frames are gathered as hashables from unwindEntry above; give a
+ // unique ID to everything else.
isec->icfEqClass[0] = ++icfUniqueID;
}
}
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 05f491e1b7bae..16d019fb477bd 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -45,6 +45,7 @@
#include "Config.h"
#include "Driver.h"
#include "Dwarf.h"
+#include "EhFrame.h"
#include "ExportTrie.h"
#include "InputSection.h"
#include "MachOStructs.h"
@@ -323,6 +324,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
section, data.slice(off, recordSize), align);
subsections.push_back({off, isec});
}
+ section.doneSplitting = true;
};
if (sectionType(sec.flags) == S_CSTRING_LITERALS ||
@@ -344,6 +346,9 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
section.subsections.push_back({0, isec});
} else if (auto recordSize = getRecordSize(segname, name)) {
splitRecords(*recordSize);
+ } else if (config->parseEhFrames && name == section_names::ehFrame &&
+ segname == segment_names::text) {
+ splitEhFrames(data, *sections.back());
} else if (segname == segment_names::llvm) {
if (config->callGraphProfileSort && name == section_names::cgProfile)
checkError(parseCallGraph(data, callGraph));
@@ -371,6 +376,45 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
}
}
+void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
+ EhReader reader(this, data, /*dataOff=*/0, target->wordSize);
+ size_t off = 0;
+ while (off < reader.size()) {
+ uint64_t frameOff = off;
+ uint64_t length = reader.readLength(&off);
+ if (length == 0)
+ break;
+ uint64_t fullLength = length + (off - frameOff);
+ off += length;
+ // We hard-code an alignment of 1 here because we don't actually want our
+ // EH frames to be aligned to the section alignment. EH frame decoders don't
+ // expect this alignment. Moreover, each EH frame must start where the
+ // previous one ends, and where it ends is indicated by the length field.
+ // Unless we update the length field (troublesome), we should keep the
+ // alignment to 1.
+ // Note that we still want to preserve the alignment of the overall section,
+ // just not of the individual EH frames.
+ ehFrameSection.subsections.push_back(
+ {frameOff, make<ConcatInputSection>(ehFrameSection,
+ data.slice(frameOff, fullLength),
+ /*align=*/1)});
+ }
+ ehFrameSection.doneSplitting = true;
+}
+
+template <class T>
+static Section *findContainingSection(const std::vector<Section *> §ions,
+ T *offset) {
+ static_assert(std::is_same<uint64_t, T>::value ||
+ std::is_same<uint32_t, T>::value,
+ "unexpected type for offset");
+ auto it = std::prev(llvm::upper_bound(
+ sections, *offset,
+ [](uint64_t value, const Section *sec) { return value < sec->addr; }));
+ *offset -= (*it)->addr;
+ return *it;
+}
+
// Find the subsection corresponding to the greatest section offset that is <=
// that of the given offset.
//
@@ -475,13 +519,6 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
relocation_info relInfo = relInfos[i];
bool isSubtrahend =
target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
- if (isSubtrahend && StringRef(sec.sectname) == section_names::ehFrame) {
- // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
- // adds local "EH_Frame1" and "func.eh". Ignore them because they have
- // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
- ++i;
- continue;
- }
int64_t pairedAddend = 0;
if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
@@ -637,7 +674,8 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
}
assert(!isWeakDefCanBeHidden &&
"weak_def_can_be_hidden on already-hidden symbol?");
- bool includeInSymtab = !name.startswith("l") && !name.startswith("L");
+ bool includeInSymtab =
+ !name.startswith("l") && !name.startswith("L") && !isEhFrameSection(isec);
return make<Defined>(
name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
/*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab,
@@ -730,20 +768,14 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
Subsections &subsections = sections[i]->subsections;
if (subsections.empty())
continue;
- if (sections[i]->name == section_names::ehFrame) {
- // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
- // adds local "EH_Frame1" and "func.eh". Ignore them because they have
- // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
- continue;
- }
std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
uint64_t sectionAddr = sectionHeaders[i].addr;
uint32_t sectionAlign = 1u << sectionHeaders[i].align;
- // Record-based sections have already been split into subsections during
+ // Some sections have already been split into subsections during
// parseSections(), so we simply need to match Symbols to the corresponding
// subsection here.
- if (getRecordSize(sections[i]->segname, sections[i]->name)) {
+ if (sections[i]->doneSplitting) {
for (size_t j = 0; j < symbolIndices.size(); ++j) {
uint32_t symIndex = symbolIndices[j];
const NList &sym = nList[symIndex];
@@ -760,6 +792,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
}
continue;
}
+ sections[i]->doneSplitting = true;
// Calculate symbol sizes and create subsections by splitting the sections
// along symbol boundaries.
@@ -930,6 +963,8 @@ template <class LP> void ObjFile::parse() {
}
if (compactUnwindSection)
registerCompactUnwind(*compactUnwindSection);
+ if (config->parseEhFrames && ehFrameSection)
+ registerEhFrames(*ehFrameSection);
}
template <class LP> void ObjFile::parseLazy() {
@@ -1003,6 +1038,12 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
// of the corresponding relocations.) We rely on `relocateCompactUnwind()`
// to correctly handle these truncated input sections.
isec->data = isec->data.slice(target->wordSize);
+ uint32_t encoding = read32le(isec->data.data() + sizeof(uint32_t));
+ // llvm-mc omits CU entries for functions that need DWARF encoding, but
+ // `ld -r` doesn't. We can ignore them because we will re-synthesize these
+ // CU entries from the DWARF info during the output phase.
+ if ((encoding & target->modeDwarfEncoding) == target->modeDwarfEncoding)
+ continue;
ConcatInputSection *referentIsec;
for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
@@ -1053,6 +1094,252 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
}
}
+struct CIE {
+ macho::Symbol *personalitySymbol = nullptr;
+ bool fdesHaveLsda = false;
+ bool fdesHaveAug = false;
+};
+
+static CIE parseCIE(const InputSection *isec, const EhReader &reader,
+ size_t off) {
+ // Handling the full generality of possible DWARF encodings would be a major
+ // pain. We instead take advantage of our knowledge of how llvm-mc encodes
+ // DWARF and handle just that.
+ constexpr uint8_t expectedPersonalityEnc =
+ dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_sdata4;
+ constexpr uint8_t expectedPointerEnc =
+ dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_absptr;
+
+ CIE cie;
+ uint8_t version = reader.readByte(&off);
+ if (version != 1 && version != 3)
+ fatal("Expected CIE version of 1 or 3, got " + Twine(version));
+ StringRef aug = reader.readString(&off);
+ reader.skipLeb128(&off); // skip code alignment
+ reader.skipLeb128(&off); // skip data alignment
+ reader.skipLeb128(&off); // skip return address register
+ reader.skipLeb128(&off); // skip aug data length
+ uint64_t personalityAddrOff = 0;
+ for (char c : aug) {
+ switch (c) {
+ case 'z':
+ cie.fdesHaveAug = true;
+ break;
+ case 'P': {
+ uint8_t personalityEnc = reader.readByte(&off);
+ if (personalityEnc != expectedPersonalityEnc)
+ reader.failOn(off, "unexpected personality encoding 0x" +
+ Twine::utohexstr(personalityEnc));
+ personalityAddrOff = off;
+ off += 4;
+ break;
+ }
+ case 'L': {
+ cie.fdesHaveLsda = true;
+ uint8_t lsdaEnc = reader.readByte(&off);
+ if (lsdaEnc != expectedPointerEnc)
+ reader.failOn(off, "unexpected LSDA encoding 0x" +
+ Twine::utohexstr(lsdaEnc));
+ break;
+ }
+ case 'R': {
+ uint8_t pointerEnc = reader.readByte(&off);
+ if (pointerEnc != expectedPointerEnc)
+ reader.failOn(off, "unexpected pointer encoding 0x" +
+ Twine::utohexstr(pointerEnc));
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ if (personalityAddrOff != 0) {
+ auto personalityRelocIt =
+ llvm::find_if(isec->relocs, [=](const macho::Reloc &r) {
+ return r.offset == personalityAddrOff;
+ });
+ if (personalityRelocIt == isec->relocs.end())
+ reader.failOn(off, "Failed to locate relocation for personality symbol");
+ cie.personalitySymbol = personalityRelocIt->referent.get<macho::Symbol *>();
+ }
+ return cie;
+}
+
+// EH frame target addresses may be encoded as pcrel offsets. However, instead
+// of using an actual pcrel reloc, ld64 emits subtractor relocations instead.
+// This function recovers the target address from the subtractors, essentially
+// performing the inverse operation of EhRelocator.
+//
+// Concretely, we expect our relocations to write the value of `PC -
+// target_addr` to `PC`. `PC` itself is denoted by a minuend relocation that
+// points to a symbol or section plus an addend.
+//
+// If `Invert` is set, then we instead expect `target_addr - PC` to be written
+// to `PC`.
+template <bool Invert = false>
+Defined *
+getTargetSymbolFromSubtraction(const InputSection *isec,
+ std::vector<macho::Reloc>::iterator relocIt) {
+ const macho::Reloc &subtrahend = *relocIt;
+ const macho::Reloc &minuend = *std::next(relocIt);
+ assert(target->hasAttr(subtrahend.type, RelocAttrBits::SUBTRAHEND));
+ assert(target->hasAttr(minuend.type, RelocAttrBits::UNSIGNED));
+ // Note: pcSym may *not* be exactly at the PC; there's usually a non-zero
+ // addend.
+ auto *pcSym = cast<Defined>(subtrahend.referent.get<macho::Symbol *>());
+ Defined *target =
+ cast_or_null<Defined>(minuend.referent.dyn_cast<macho::Symbol *>());
+ if (!pcSym) {
+ auto *targetIsec =
+ cast<ConcatInputSection>(minuend.referent.get<InputSection *>());
+ target = findSymbolAtOffset(targetIsec, minuend.addend);
+ }
+ if (Invert)
+ std::swap(pcSym, target);
+ if (pcSym->isec != isec ||
+ pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset)
+ fatal("invalid FDE relocation in __eh_frame");
+ return target;
+}
+
+Defined *findSymbolAtAddress(const std::vector<Section *> §ions,
+ uint64_t addr) {
+ Section *sec = findContainingSection(sections, &addr);
+ auto *isec = cast<ConcatInputSection>(findContainingSubsection(*sec, &addr));
+ return findSymbolAtOffset(isec, addr);
+}
+
+// For symbols that don't have compact unwind info, associate them with the more
+// general-purpose (and verbose) DWARF unwind info found in __eh_frame.
+//
+// This requires us to parse the contents of __eh_frame. See EhFrame.h for a
+// description of its format.
+//
+// While parsing, we also look for what MC calls "abs-ified" relocations -- they
+// are relocations which are implicitly encoded as offsets in the section data.
+// We convert them into explicit Reloc structs so that the EH frames can be
+// handled just like a regular ConcatInputSection later in our output phase.
+//
+// We also need to handle the case where our input object file has explicit
+// relocations. This is the case when e.g. it's the output of `ld -r`. We only
+// look for the "abs-ified" relocation if an explicit relocation is absent.
+void ObjFile::registerEhFrames(Section &ehFrameSection) {
+ DenseMap<const InputSection *, CIE> cieMap;
+ for (const Subsection &subsec : ehFrameSection.subsections) {
+ auto *isec = cast<ConcatInputSection>(subsec.isec);
+ uint64_t isecOff = subsec.offset;
+
+ // Subtractor relocs require the subtrahend to be a symbol reloc. Ensure
+ // that all EH frames have an associated symbol so that we can generate
+ // subtractor relocs that reference them.
+ if (isec->symbols.size() == 0)
+ isec->symbols.push_back(make<Defined>(
+ "EH_Frame", isec->getFile(), isec, /*value=*/0, /*size=*/0,
+ /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
+ /*includeInSymtab=*/false, /*isThumb=*/false,
+ /*isReferencedDynamically=*/false, /*noDeadStrip=*/false));
+ else if (isec->symbols[0]->value != 0)
+ fatal("found symbol at unexpected offset in __eh_frame");
+
+ EhReader reader(this, isec->data, subsec.offset, target->wordSize);
+ size_t dataOff = 0; // Offset from the start of the EH frame.
+ reader.skipValidLength(&dataOff); // readLength() already validated this.
+ // cieOffOff is the offset from the start of the EH frame to the cieOff
+ // value, which is itself an offset from the current PC to a CIE.
+ const size_t cieOffOff = dataOff;
+
+ EhRelocator ehRelocator(isec);
+ auto cieOffRelocIt = llvm::find_if(
+ isec->relocs, [=](const Reloc &r) { return r.offset == cieOffOff; });
+ InputSection *cieIsec = nullptr;
+ if (cieOffRelocIt != isec->relocs.end()) {
+ // We already have an explicit relocation for the CIE offset.
+ cieIsec =
+ getTargetSymbolFromSubtraction</*Invert=*/true>(isec, cieOffRelocIt)
+ ->isec;
+ dataOff += sizeof(uint32_t);
+ } else {
+ // If we haven't found a relocation, then the CIE offset is most likely
+ // embedded in the section data (AKA an "abs-ified" reloc.). Parse that
+ // and generate a Reloc struct.
+ uint32_t cieMinuend = reader.readU32(&dataOff);
+ if (cieMinuend == 0)
+ cieIsec = isec;
+ else {
+ uint32_t cieOff = isecOff + dataOff - cieMinuend;
+ cieIsec = findContainingSubsection(ehFrameSection, &cieOff);
+ if (cieIsec == nullptr)
+ fatal("failed to find CIE");
+ }
+ if (cieIsec != isec)
+ ehRelocator.makeNegativePcRel(cieOffOff, cieIsec->symbols[0],
+ /*length=*/2);
+ }
+ if (cieIsec == isec) {
+ cieMap[cieIsec] = parseCIE(isec, reader, dataOff);
+ continue;
+ }
+
+ // Offset of the function address within the EH frame.
+ const size_t funcAddrOff = dataOff;
+ uint64_t funcAddr = reader.readPointer(&dataOff) + ehFrameSection.addr +
+ isecOff + funcAddrOff;
+ uint32_t funcLength = reader.readPointer(&dataOff);
+ size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
+ assert(cieMap.count(cieIsec));
+ const CIE &cie = cieMap[cieIsec];
+ Optional<uint64_t> lsdaAddrOpt;
+ if (cie.fdesHaveAug) {
+ reader.skipLeb128(&dataOff);
+ lsdaAddrOff = dataOff;
+ if (cie.fdesHaveLsda) {
+ uint64_t lsdaOff = reader.readPointer(&dataOff);
+ if (lsdaOff != 0) // FIXME possible to test this?
+ lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
+ }
+ }
+
+ auto funcAddrRelocIt = isec->relocs.end();
+ auto lsdaAddrRelocIt = isec->relocs.end();
+ for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
+ if (it->offset == funcAddrOff)
+ funcAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
+ else if (lsdaAddrOpt && it->offset == lsdaAddrOff)
+ lsdaAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
+ }
+
+ Defined *funcSym;
+ if (funcAddrRelocIt != isec->relocs.end()) {
+ funcSym = getTargetSymbolFromSubtraction(isec, funcAddrRelocIt);
+ } else {
+ funcSym = findSymbolAtAddress(sections, funcAddr);
+ ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize);
+ }
+ // The symbol has been coalesced, or already has a compact unwind entry.
+ if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry) {
+ // We must prune unused FDEs for correctness, so we cannot rely on
+ // -dead_strip being enabled.
+ isec->live = false;
+ continue;
+ }
+
+ InputSection *lsdaIsec = nullptr;
+ if (lsdaAddrRelocIt != isec->relocs.end()) {
+ lsdaIsec = getTargetSymbolFromSubtraction(isec, lsdaAddrRelocIt)->isec;
+ } else if (lsdaAddrOpt) {
+ uint64_t lsdaAddr = *lsdaAddrOpt;
+ Section *sec = findContainingSection(sections, &lsdaAddr);
+ lsdaIsec =
+ cast<ConcatInputSection>(findContainingSubsection(*sec, &lsdaAddr));
+ ehRelocator.makePcRel(lsdaAddrOff, lsdaIsec, target->p2WordSize);
+ }
+
+ fdes[isec] = {funcLength, cie.personalitySymbol, lsdaIsec};
+ funcSym->unwindEntry = isec;
+ ehRelocator.commit();
+ }
+}
+
// The path can point to either a dylib or a .tbd file.
static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
Optional<MemoryBufferRef> mbref = readFile(path);
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index b33e510a69016..3e66a791d9b89 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -60,7 +60,8 @@ struct Subsection {
using Subsections = std::vector<Subsection>;
class InputFile;
-struct Section {
+class Section {
+public:
InputFile *file;
StringRef segname;
StringRef name;
@@ -76,6 +77,13 @@ struct Section {
Section &operator=(const Section &) = delete;
Section(Section &&) = delete;
Section &operator=(Section &&) = delete;
+
+private:
+ // Whether we have already split this section into individual subsections.
+ // For sections that cannot be split (e.g. literal sections), this is always
+ // false.
+ bool doneSplitting = false;
+ friend class ObjFile;
};
// Represents a call graph profile edge.
@@ -135,6 +143,12 @@ class InputFile {
static int idCount;
};
+struct FDE {
+ uint32_t funcLength;
+ Symbol *personality;
+ InputSection *lsda;
+};
+
// .o file
class ObjFile final : public InputFile {
public:
@@ -146,10 +160,11 @@ class ObjFile final : public InputFile {
static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
llvm::DWARFUnit *compileUnit = nullptr;
+ Section *addrSigSection = nullptr;
const uint32_t modTime;
std::vector<ConcatInputSection *> debugSections;
std::vector<CallGraphEntry> callGraph;
- Section *addrSigSection = nullptr;
+ llvm::DenseMap<ConcatInputSection *, FDE> fdes;
private:
template <class LP> void parseLazy();
@@ -164,7 +179,9 @@ class ObjFile final : public InputFile {
void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
const SectionHeader &, Section &);
void parseDebugInfo();
+ void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
void registerCompactUnwind(Section &compactUnwindSection);
+ void registerEhFrames(Section &ehFrameSection);
};
// command-line -sectcreate file
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index bd5978b7c32e3..444a1e9b17c1a 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -268,6 +268,11 @@ bool macho::isClassRefsSection(const InputSection *isec) {
isec->getSegName() == segment_names::data;
}
+bool macho::isEhFrameSection(const InputSection *isec) {
+ return isec->getName() == section_names::ehFrame &&
+ isec->getSegName() == segment_names::text;
+}
+
std::string lld::toString(const InputSection *isec) {
return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str();
}
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index e0184431d6d05..ad8404d0a64bb 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -273,6 +273,7 @@ inline bool isWordLiteralSection(uint32_t flags) {
bool isCodeSection(const InputSection *);
bool isCfStringSection(const InputSection *);
bool isClassRefsSection(const InputSection *);
+bool isEhFrameSection(const InputSection *);
extern std::vector<ConcatInputSection *> inputSections;
diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h
index b82ca4ebebccf..3c134d55cb201 100644
--- a/lld/MachO/Relocations.h
+++ b/lld/MachO/Relocations.h
@@ -61,6 +61,13 @@ struct Reloc {
// gives the destination that this relocation refers to.
int64_t addend = 0;
llvm::PointerUnion<Symbol *, InputSection *> referent = nullptr;
+
+ Reloc() = default;
+
+ Reloc(uint8_t type, bool pcrel, uint8_t length, uint32_t offset,
+ int64_t addend, llvm::PointerUnion<Symbol *, InputSection *> referent)
+ : type(type), pcrel(pcrel), length(length), offset(offset),
+ addend(addend), referent(referent) {}
};
bool validateSymbolRelocation(const Symbol *, const InputSection *,
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 7dbab691c564d..227244711c08e 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -183,6 +183,7 @@ class Defined : public Symbol {
uint64_t value;
// size is only calculated for regular (non-bitcode) symbols.
uint64_t size;
+ // This can be a subsection of either __compact_unwind or __eh_frame.
ConcatInputSection *unwindEntry = nullptr;
};
diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index b56497a99b0b5..e66a6966b59d7 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -14,6 +14,7 @@
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cstddef>
@@ -37,6 +38,7 @@ class TargetInfo {
pageZeroSize = LP::pageZeroSize;
headerSize = sizeof(typename LP::mach_header);
wordSize = LP::wordSize;
+ p2WordSize = llvm::CTLog2<LP::wordSize>();
}
virtual ~TargetInfo() = default;
@@ -85,12 +87,17 @@ class TargetInfo {
size_t stubSize;
size_t stubHelperHeaderSize;
size_t stubHelperEntrySize;
+ uint8_t p2WordSize;
size_t wordSize;
size_t thunkSize = 0;
uint64_t forwardBranchRange = 0;
uint64_t backwardBranchRange = 0;
+ uint32_t modeDwarfEncoding;
+ uint8_t subtractorRelocType;
+ uint8_t unsignedRelocType;
+
// We contrive this value as sufficiently far from any valid address that it
// will always be out-of-range for any architecture. UINT64_MAX is not a
// good choice because it is (a) only 1 away from wrapping to 0, and (b) the
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 7aa0e8253c768..1d6d658f97888 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -28,6 +28,7 @@
using namespace llvm;
using namespace llvm::MachO;
+using namespace llvm::support::endian;
using namespace lld;
using namespace lld::macho;
@@ -222,7 +223,8 @@ void UnwindInfoSectionImpl::prepareRelocations() {
// entries to the GOT. Hence the use of a MapVector for
// UnwindInfoSection::symbols.
for (const Defined *d : make_second_range(symbols))
- if (d->unwindEntry)
+ if (d->unwindEntry &&
+ d->unwindEntry->getName() == section_names::compactUnwind)
prepareRelocations(d->unwindEntry);
}
@@ -331,6 +333,18 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
if (!d->unwindEntry)
return;
+ // If we have DWARF unwind info, create a CU entry that points to it.
+ if (d->unwindEntry->getName() == section_names::ehFrame) {
+ cu.encoding = target->modeDwarfEncoding | d->unwindEntry->outSecOff;
+ const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry];
+ cu.functionLength = fde.funcLength;
+ cu.personality = fde.personality;
+ cu.lsda = fde.lsda;
+ return;
+ }
+
+ assert(d->unwindEntry->getName() == section_names::compactUnwind);
+
auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) -
target->wordSize;
cu.functionLength =
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index a485ba97d8d39..3e9641ba5ef7b 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -950,8 +950,14 @@ template <class LP> void Writer::createOutputSections() {
StringRef segname = it.first.first;
ConcatOutputSection *osec = it.second;
assert(segname != segment_names::ld);
- if (osec->isNeeded())
+ if (osec->isNeeded()) {
+ // See comment in ObjFile::splitEhFrames()
+ if (osec->name == section_names::ehFrame &&
+ segname == segment_names::text)
+ osec->align = target->wordSize;
+
getOrCreateOutputSegment(segname)->addOutputSection(osec);
+ }
}
for (SyntheticSection *ssec : syntheticSections) {
diff --git a/lld/test/MachO/Inputs/eh-frame-x86_64-r.o b/lld/test/MachO/Inputs/eh-frame-x86_64-r.o
new file mode 100644
index 0000000000000..d98e14ec81bff
Binary files /dev/null and b/lld/test/MachO/Inputs/eh-frame-x86_64-r.o
diff er
diff --git a/lld/test/MachO/eh-frame.s b/lld/test/MachO/eh-frame.s
new file mode 100644
index 0000000000000..24fd7bfc93f97
--- /dev/null
+++ b/lld/test/MachO/eh-frame.s
@@ -0,0 +1,161 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; mkdir %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %s -o %t/eh-frame-x86_64.o
+# RUN: %lld -lSystem -lc++ %t/eh-frame-x86_64.o -o %t/eh-frame-x86_64
+# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \
+# RUN: --dwarf=frames %t/eh-frame-x86_64 | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=4
+# RUN: llvm-nm -m %t/eh-frame-x86_64 | FileCheck %s --check-prefix NO-EH-SYMS
+# RUN: llvm-readobj --section-headers %t/eh-frame-x86_64 | FileCheck %s --check-prefix=ALIGN -D#ALIGN=3
+
+## Test that we correctly handle the output of `ld -r`, which emits EH frames
+## using subtractor relocations instead of implicitly encoding the offsets.
+## In order to keep this test cross-platform, we check in ld64's output rather
+## than invoking ld64 directly. NOTE: whenever this test is updated, the
+## checked-in copy of `ld -r`'s output should be updated too!
+# COM: ld -r %t/eh-frame-x86_64.o -o %S/Inputs/eh-frame-x86_64-r.o
+# RUN: %lld -lSystem -lc++ %S/Inputs/eh-frame-x86_64-r.o -o %t/eh-frame-x86_64-r
+# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \
+# RUN: --dwarf=frames %t/eh-frame-x86_64-r | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=4
+# RUN: llvm-nm -m %t/eh-frame-x86_64-r | FileCheck %s --check-prefix NO-EH-SYMS
+# RUN: llvm-readobj --section-headers %t/eh-frame-x86_64-r | FileCheck %s --check-prefix=ALIGN -D#ALIGN=3
+
+# ALIGN: Name: __eh_frame
+# ALIGN-NEXT: Segment: __TEXT
+# ALIGN-NEXT: Address:
+# ALIGN-NEXT: Size:
+# ALIGN-NEXT: Offset:
+# ALIGN-NEXT: Alignment: [[#ALIGN]]
+
+# NO-EH-SYMS-NOT: __eh_frame
+
+# CHECK: Indirect symbols for (__DATA_CONST,__got) 2 entries
+# CHECK: address index name
+# CHECK: 0x[[#%x,GXX_PERSONALITY_GOT:]] {{.*}} ___gxx_personality_v0
+# CHECK: 0x[[#%x,MY_PERSONALITY_GOT:]]
+# CHECK: SYMBOL TABLE:
+# CHECK-DAG: [[#%x,F:]] l F __TEXT,__text _f
+# CHECK-DAG: [[#%x,NO_UNWIND:]] l F __TEXT,__text _no_unwind
+# CHECK-DAG: [[#%x,G:]] l F __TEXT,__text _g
+# CHECK-DAG: [[#%x,H:]] l F __TEXT,__text _h
+# CHECK-DAG: [[#%x,EXCEPT0:]] l O __TEXT,__gcc_except_tab GCC_except_table0
+# CHECK-DAG: [[#%x,EXCEPT1:]] l O __TEXT,__gcc_except_tab GCC_except_table1
+# CHECK-DAG: [[#%x,EXCEPT2:]] l O __TEXT,custom_except custom_except_table2
+# CHECK-DAG: [[#%x,MY_PERSONALITY:]] g F __TEXT,__text _my_personality
+# CHECK: Contents of __unwind_info section:
+# CHECK: Version: 0x1
+# CHECK: Number of personality functions in array: 0x2
+# CHECK: Number of indices in array: 0x2
+# CHECK: Personality functions: (count = 2)
+# CHECK: personality[1]: 0x[[#%.8x,GXX_PERSONALITY_GOT - BASE]]
+# CHECK: personality[2]: 0x[[#%.8x,MY_PERSONALITY_GOT - BASE]]
+# CHECK: LSDA descriptors:
+# CHECK: [0]: function offset=0x[[#%.8x,F - BASE]], LSDA offset=0x[[#%.8x,EXCEPT0 - BASE]]
+# CHECK: [1]: function offset=0x[[#%.8x,G - BASE]], LSDA offset=0x[[#%.8x,EXCEPT1 - BASE]]
+# CHECK: [2]: function offset=0x[[#%.8x,H - BASE]], LSDA offset=0x[[#%.8x,EXCEPT2 - BASE]]
+# CHECK: Second level indices:
+# CHECK: Second level index[0]:
+# CHECK: [0]: function offset=0x[[#%.8x,F - BASE]], encoding[{{.*}}]=0x52{{.*}}
+# CHECK: [1]: function offset=0x[[#%.8x,NO_UNWIND - BASE]], encoding[{{.*}}]=0x00000000
+# CHECK: [2]: function offset=0x[[#%.8x,G - BASE]], encoding[{{.*}}]=0x1[[#%x,DWARF_ENC]][[#%.6x, G_DWARF_OFF:]]
+# CHECK: [3]: function offset=0x[[#%.8x,H - BASE]], encoding[{{.*}}]=0x2[[#%x,DWARF_ENC]][[#%.6x, H_DWARF_OFF:]]
+# CHECK: [4]: function offset=0x[[#%.8x,MY_PERSONALITY - BASE]], encoding[{{.*}}]=0x00000000
+
+# CHECK: .debug_frame contents:
+# CHECK: .eh_frame contents:
+
+# CHECK: [[#%.8x,CIE1_OFF:]] {{.*}} CIE
+# CHECK: Format: DWARF32
+# CHECK: Version: 1
+# CHECK: Augmentation: "zPLR"
+# CHECK: Code alignment factor: 1
+# CHECK: Data alignment factor: -8
+# CHECK: Return address column:
+# CHECK: Personality Address: [[#%.16x,GXX_PERSONALITY_GOT]]
+# CHECK: Augmentation data: 9B {{(([[:xdigit:]]{2} ){4})}}10 10
+
+# CHECK: [[#%.8x,G_DWARF_OFF]] {{.*}} [[#%.8x,G_DWARF_OFF + 4 - CIE1_OFF]] FDE cie=[[#CIE1_OFF]] pc=[[#%x,G]]
+# CHECK: Format: DWARF32
+# CHECK: LSDA Address: [[#%.16x,EXCEPT1]]
+# CHECK: DW_CFA_def_cfa_offset: +8
+# CHECK: 0x[[#%x,G]]:
+
+# CHECK: [[#%.8x,CIE2_OFF:]] {{.*}} CIE
+# CHECK: Format: DWARF32
+# CHECK: Version: 1
+# CHECK: Augmentation: "zPLR"
+# CHECK: Code alignment factor: 1
+# CHECK: Data alignment factor: -8
+# CHECK: Return address column:
+# CHECK: Personality Address: [[#%.16x,MY_PERSONALITY_GOT]]
+# CHECK: Augmentation data: 9B {{(([[:xdigit:]]{2} ){4})}}10 10
+
+# CHECK: [[#%.8x,H_DWARF_OFF]] {{.*}} [[#%.8x,H_DWARF_OFF + 4 - CIE2_OFF]] FDE cie=[[#CIE2_OFF]] pc=[[#%x,H]]
+# CHECK: Format: DWARF32
+# CHECK: LSDA Address: [[#%.16x,EXCEPT2]]
+# CHECK: DW_CFA_def_cfa_offset: +8
+# CHECK: 0x[[#%x,H]]:
+
+.globl _my_personality, _main
+
+.text
+## _f's unwind info can be encoded with compact unwind, so we shouldn't see an
+## FDE entry for it in the output file.
+.p2align 2
+_f:
+ .cfi_startproc
+ .cfi_personality 155, ___gxx_personality_v0
+ .cfi_lsda 16, Lexception0
+ .cfi_def_cfa_offset 8
+ ret
+ .cfi_endproc
+
+.p2align 2
+_no_unwind:
+ ret
+
+.p2align 2
+_g:
+ .cfi_startproc
+ .cfi_personality 155, ___gxx_personality_v0
+ .cfi_lsda 16, Lexception1
+ .cfi_def_cfa_offset 8
+ ## cfi_escape cannot be encoded in compact unwind, so we must keep _g's FDE
+ .cfi_escape 0x2e, 0x10
+ ret
+ .cfi_endproc
+
+.p2align 2
+_h:
+ .cfi_startproc
+ .cfi_personality 155, _my_personality
+ .cfi_lsda 16, Lexception2
+ .cfi_def_cfa_offset 8
+ ## cfi_escape cannot be encoded in compact unwind, so we must keep _h's FDE
+ .cfi_escape 0x2e, 0x10
+ ret
+ .cfi_endproc
+
+.p2align 2
+_my_personality:
+ ret
+
+.p2align 2
+_main:
+ ret
+
+.section __TEXT,__gcc_except_tab
+GCC_except_table0:
+Lexception0:
+ .byte 255
+
+GCC_except_table1:
+Lexception1:
+ .byte 255
+
+.section __TEXT,custom_except
+custom_except_table2:
+Lexception2:
+ .byte 255
+
+.subsections_via_symbols
diff --git a/lld/test/MachO/invalid/eh-frame.s b/lld/test/MachO/invalid/eh-frame.s
new file mode 100644
index 0000000000000..0e51975bfa4b8
--- /dev/null
+++ b/lld/test/MachO/invalid/eh-frame.s
@@ -0,0 +1,83 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/too-small-1.s -o %t/too-small-1.o
+# RUN: not %lld -lSystem -dylib %t/too-small-1.o -o /dev/null 2>&1 | FileCheck %s --check-prefix TOO-SMALL-1
+# TOO-SMALL-1: error: {{.*}}too-small-1.o:(__eh_frame+0x0): CIE/FDE too small
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/too-small-2.s -o %t/too-small-2.o
+# RUN: not %lld -lSystem -dylib %t/too-small-2.o -o /dev/null 2>&1 | FileCheck %s --check-prefix TOO-SMALL-2
+# TOO-SMALL-2: error: {{.*}}too-small-2.o:(__eh_frame+0x0): CIE/FDE extends past the end of the section
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/personality-enc.s -o %t/personality-enc.o
+# RUN: not %lld -lSystem -dylib %t/personality-enc.o -o /dev/null 2>&1 | FileCheck %s --check-prefix PERS-ENC
+# PERS-ENC: error: {{.*}}personality-enc.o:(__eh_frame+0x12): unexpected personality encoding 0xb
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/pointer-enc.s -o %t/pointer-enc.o
+# RUN: not %lld -lSystem -dylib %t/pointer-enc.o -o /dev/null 2>&1 | FileCheck %s --check-prefix PTR-ENC
+# PTR-ENC: error: {{.*}}pointer-enc.o:(__eh_frame+0x11): unexpected pointer encoding 0x12
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/string-err.s -o %t/string-err.o
+# RUN: not %lld -lSystem -dylib %t/string-err.o -o /dev/null 2>&1 | FileCheck %s --check-prefix STR
+# STR: error: {{.*}}string-err.o:(__eh_frame+0x9): corrupted CIE (failed to read string)
+
+#--- too-small-1.s
+.p2align 3
+.section __TEXT,__eh_frame
+.short 0x3
+
+.subsections_via_symbols
+
+#--- too-small-2.s
+.p2align 3
+.section __TEXT,__eh_frame
+.long 0x3 # length
+
+.subsections_via_symbols
+
+#--- personality-enc.s
+.p2align 3
+.section __TEXT,__eh_frame
+
+.long 0x14 # length
+.long 0 # CIE offset
+.byte 1 # version
+.asciz "zPR" # aug string
+.byte 0x01 # code alignment
+.byte 0x78 # data alignment
+.byte 0x10 # return address register
+.byte 0x01 # aug length
+.byte 0x0b # personality encoding
+.long 0xffff # personality pointer
+.byte 0x10 # pointer encoding
+.space 1 # pad to alignment
+
+.subsections_via_symbols
+
+#--- pointer-enc.s
+.p2align 3
+.section __TEXT,__eh_frame
+
+.long 0x14 # length
+.long 0 # CIE offset
+.byte 1 # version
+.asciz "zR" # aug string
+.byte 0x01 # code alignment
+.byte 0x78 # data alignment
+.byte 0x10 # return address register
+.byte 0x01 # aug length
+.byte 0x12 # pointer encoding
+.space 7 # pad to alignment
+
+.subsections_via_symbols
+
+#--- string-err.s
+.p2align 3
+.section __TEXT,__eh_frame
+
+.long 0x7 # length
+.long 0 # CIE offset
+.byte 1 # version
+.ascii "zR" # invalid aug string
+
+.subsections_via_symbols
diff --git a/lld/test/MachO/obj-file-with-stabs.s b/lld/test/MachO/obj-file-with-stabs.s
index 134a748a7ec2a..fd558e8f3e8cc 100644
--- a/lld/test/MachO/obj-file-with-stabs.s
+++ b/lld/test/MachO/obj-file-with-stabs.s
@@ -1,27 +1,33 @@
# REQUIRES: x86
-## FIXME: This yaml is from an object file produced with 'ld -r'
-## Replace this with "normal" .s test format once lld supports `-r`
+## FIXME: This yaml is from an object file produced with 'ld -r':
+##
+## echo "int main() {return 1;}" > test.c
+## clang -c -g -o test.o test.c
+## ld -r -o test2.o test.o -no_data_in_code_info
+##
+## Replace this with "normal" .s test format once lld supports `-r`
# RUN: yaml2obj %s -o %t.o
-# RUN: %lld -lSystem -platform_version macos 11.3 11.0 -arch x86_64 %t.o -o %t
+# RUN: %lld -lSystem -arch x86_64 %t.o -o %t
+
--- !mach-o
FileHeader:
magic: 0xFEEDFACF
- cputype: 0x01000007
- cpusubtype: 0x00000003
- filetype: 0x00000001
- ncmds: 2
- sizeofcmds: 384
- flags: 0x00002000
- reserved: 0x00000000
+ cputype: 0x1000007
+ cpusubtype: 0x3
+ filetype: 0x1
+ ncmds: 3
+ sizeofcmds: 288
+ flags: 0x2000
+ reserved: 0x0
LoadCommands:
- cmd: LC_SEGMENT_64
- cmdsize: 312
+ cmdsize: 232
segname: ''
vmaddr: 0
- vmsize: 120
- fileoff: 448
- filesize: 120
+ vmsize: 56
+ fileoff: 352
+ filesize: 56
maxprot: 7
initprot: 7
nsects: 2
@@ -29,57 +35,33 @@ LoadCommands:
Sections:
- sectname: __text
segname: __TEXT
- addr: 0x0000000000000000
+ addr: 0x0
size: 18
- offset: 0x000001C0
+ offset: 0x160
align: 4
- reloff: 0x00000000
+ reloff: 0x0
nreloc: 0
flags: 0x80000400
- reserved1: 0x00000000
- reserved2: 0x00000000
- reserved3: 0x00000000
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
content: 554889E5C745FC00000000B8010000005DC3
- - sectname: __eh_frame
- segname: __TEXT
- addr: 0x0000000000000018
- size: 64
- offset: 0x000001D8
+ - sectname: __compact_unwind
+ segname: __LD
+ addr: 0x18
+ size: 32
+ offset: 0x178
align: 3
- reloff: 0x00000238
- nreloc: 4
- flags: 0x00000000
- reserved1: 0x00000000
- reserved2: 0x00000000
- reserved3: 0x00000000
- content: 1400000000000000017A520001781001100C0708900100002400000004000000F8FFFFFFFFFFFFFF120000000000000000410E108602430D0600000000000000
+ reloff: 0x198
+ nreloc: 1
+ flags: 0x2000000
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: '0000000000000000120000000000000100000000000000000000000000000000'
relocations:
- - address: 0x0000001C
- symbolnum: 0
- pcrel: false
- length: 2
- extern: true
- type: 5
- scattered: false
- value: 0
- - address: 0x0000001C
- symbolnum: 1
- pcrel: false
- length: 2
- extern: true
- type: 0
- scattered: false
- value: 0
- - address: 0x00000020
- symbolnum: 1
- pcrel: false
- length: 3
- extern: true
- type: 5
- scattered: false
- value: 0
- - address: 0x00000020
- symbolnum: 10
+ - address: 0x0
+ symbolnum: 8
pcrel: false
length: 3
extern: true
@@ -88,39 +70,72 @@ LoadCommands:
value: 0
- cmd: LC_SYMTAB
cmdsize: 24
- symoff: 608
- nsyms: 11
- stroff: 784
- strsize: 72
+ symoff: 416
+ nsyms: 9
+ stroff: 560
+ strsize: 48
+ - cmd: LC_BUILD_VERSION
+ cmdsize: 32
+ platform: 1
+ minos: 659200
+ sdk: 0
+ ntools: 1
+ Tools:
+ - tool: 3
+ version: 46596096
LinkEditData:
NameList:
- - n_strx: 8 ## N_STAB sym (in got)
- n_type: 0x0E
- n_sect: 2
+ - n_strx: 8
+ n_type: 0x64 ## N_SO STAB
+ n_sect: 0
n_desc: 0
- n_value: 24
- - n_strx: 18
- n_type: 0x0E
- n_sect: 2
+ n_value: 0
+ - n_strx: 14
+ n_type: 0x64 ## N_SO STAB
+ n_sect: 0
+ n_desc: 0
+ n_value: 0
+ - n_strx: 21
+ n_type: 0x66 ## N_OSO STAB
+ n_sect: 3
+ n_desc: 1
+ n_value: 1651001352
+ - n_strx: 1
+ n_type: 0x2E ## N_BNSYM STAB
+ n_sect: 1
+ n_desc: 0
+ n_value: 0
+ - n_strx: 41
+ n_type: 0x24 ## N_FUN STAB
+ n_sect: 1
n_desc: 0
- n_value: 48
+ n_value: 0
+ - n_strx: 1
+ n_type: 0x24 ## N_FUN STAB
+ n_sect: 0
+ n_desc: 0
+ n_value: 18
- n_strx: 1
- n_type: 0x4E
+ n_type: 0x4E ## N_ENSYM STAB
n_sect: 1
n_desc: 0
n_value: 18
- - n_strx: 2 ## _main
- n_type: 0x0F
+ - n_strx: 1
+ n_type: 0x64 ## N_SO STAB
+ n_sect: 1
+ n_desc: 0
+ n_value: 0
+ - n_strx: 2
+ n_type: 0xF
n_sect: 1
n_desc: 0
n_value: 0
StringTable:
- ' '
- _main
- - EH_Frame1
- - func.eh
- - '/Users/vyng/'
- - test.cc
- - '/Users/vyng/test.o'
+ - '/tmp/'
+ - test.c
+ - '/private/tmp/test.o'
- _main
+ - ''
...
diff --git a/lld/test/MachO/tools/generate-cfi-funcs.py b/lld/test/MachO/tools/generate-cfi-funcs.py
index a91eab3eeac6b..c8af2c4876f4a 100755
--- a/lld/test/MachO/tools/generate-cfi-funcs.py
+++ b/lld/test/MachO/tools/generate-cfi-funcs.py
@@ -24,9 +24,6 @@ def print_function(name):
have_lsda = (random.random() < lsda_odds)
frame_size = random.randint(4, 64) * 16
frame_offset = -random.randint(0, (frame_size/16 - 4)) * 16
- reg_count = random.randint(0, 5)
- reg_combo = random.randint(0, factorial(reg_count) - 1)
- regs_saved = saved_regs_combined[reg_count][reg_combo]
global func_size_low, func_size_high
func_size = random.randint(func_size_low, func_size_high) * 0x10
func_size_high += 1
@@ -34,13 +31,13 @@ def print_function(name):
func_size_low += 1
print("""\
-### %s regs=%d frame=%d lsda=%s size=%d
+### %s frame=%d lsda=%s size=%d
.section __TEXT,__text,regular,pure_instructions
.p2align 4, 0x90
.globl %s
%s:
.cfi_startproc""" % (
- name, reg_count, frame_size, have_lsda, func_size, name, name))
+ name, frame_size, have_lsda, func_size, name, name))
if have_lsda:
global lsda_n
lsda_n += 1
@@ -53,8 +50,6 @@ def print_function(name):
.cfi_offset %%rbp, %d
movq %%rsp, %%rbp
.cfi_def_cfa_register %%rbp""" % (frame_size, frame_offset + 6*8))
- for i in range(reg_count):
- print(".cfi_offset %s, %d" % (regs_saved[i], frame_offset+(i*8)))
print("""\
.fill %d
popq %%rbp
More information about the llvm-commits
mailing list