[lld] a8843ec - [lld-macho] Parallelize linker optimization hint processing
Daniel Bertalan via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 16 08:39:22 PDT 2022
Author: Daniel Bertalan
Date: 2022-09-16T17:38:46+02:00
New Revision: a8843ec95295a927d3f31719cec4b6bcefb90844
URL: https://github.com/llvm/llvm-project/commit/a8843ec95295a927d3f31719cec4b6bcefb90844
DIFF: https://github.com/llvm/llvm-project/commit/a8843ec95295a927d3f31719cec4b6bcefb90844.diff
LOG: [lld-macho] Parallelize linker optimization hint processing
This commit moves the parsing of linker optimization hints into
`ARM64::applyOptimizationHints`. This lets us avoid allocating memory
for holding the parsed information, and moves work out of
`ObjFile::parse`, which is not parallelized at the moment.
This change reduces the overhead of processing LOHs to 25-30 ms when
linking Chromium Framework on my M1 machine; previously it took close to
100 ms.
There's no statistically significant change in runtime for a --threads=1
link.
Performance figures with all 8 cores utilized:
N Min Max Median Avg Stddev
x 20 3.8027232 3.8760762 3.8505335 3.8454145 0.026352574
+ 20 3.7019017 3.8660538 3.7546209 3.7620371 0.032680043
Difference at 95.0% confidence
-0.0833775 +/- 0.019
-2.16823% +/- 0.494094%
(Student's t, pooled s = 0.0296854)
Differential Revision: https://reviews.llvm.org/D133439
Added:
Modified:
lld/MachO/Arch/ARM64.cpp
lld/MachO/InputFiles.cpp
lld/MachO/InputFiles.h
lld/MachO/InputSection.cpp
lld/MachO/InputSection.h
lld/MachO/Relocations.h
lld/MachO/Target.h
lld/MachO/Writer.cpp
lld/test/MachO/invalid/invalid-loh.s
lld/test/MachO/loh-adrp-adrp.s
Removed:
################################################################################
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 2041de6561c32..241b3f557b45d 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -40,8 +41,7 @@ struct ARM64 : ARM64Common {
uint64_t selectorIndex, uint64_t gotAddr,
uint64_t msgSendIndex) const override;
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
- void applyOptimizationHints(uint8_t *,
- const ConcatInputSection *) const override;
+ void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
};
} // namespace
@@ -196,23 +196,6 @@ struct Ldr {
ExtendType extendType;
int64_t offset;
};
-
-class OptimizationHintContext {
-public:
- OptimizationHintContext(uint8_t *buf, const ConcatInputSection *isec)
- : buf(buf), isec(isec) {}
-
- void applyAdrpAdd(uint64_t, uint64_t);
- void applyAdrpAdrp(uint64_t, uint64_t);
- void applyAdrpLdr(uint64_t, uint64_t);
- void applyAdrpLdrGot(uint64_t, uint64_t);
- void applyAdrpAddLdr(uint64_t, uint64_t, uint64_t);
- void applyAdrpLdrGotLdr(uint64_t, uint64_t, uint64_t);
-
-private:
- uint8_t *buf;
- const ConcatInputSection *isec;
-};
} // namespace
static bool parseAdrp(uint32_t insn, Adrp &adrp) {
@@ -347,7 +330,8 @@ static void writeImmediateLdr(void *loc, const Ldr &ldr) {
// ->
// adr xM, _foo
// nop
-void OptimizationHintContext::applyAdrpAdd(uint64_t offset1, uint64_t offset2) {
+static void applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
+ uint64_t offset1, uint64_t offset2) {
uint32_t ins1 = read32le(buf + offset1);
uint32_t ins2 = read32le(buf + offset2);
Adrp adrp;
@@ -375,8 +359,8 @@ void OptimizationHintContext::applyAdrpAdd(uint64_t offset1, uint64_t offset2) {
// ->
// adrp xN, _foo at PAGE
// nop
-void OptimizationHintContext::applyAdrpAdrp(uint64_t offset1,
- uint64_t offset2) {
+static void applyAdrpAdrp(uint8_t *buf, const ConcatInputSection *isec,
+ uint64_t offset1, uint64_t offset2) {
uint32_t ins1 = read32le(buf + offset1);
uint32_t ins2 = read32le(buf + offset2);
Adrp adrp1, adrp2;
@@ -402,7 +386,8 @@ void OptimizationHintContext::applyAdrpAdrp(uint64_t offset1,
// ->
// nop
// ldr xM, _foo
-void OptimizationHintContext::applyAdrpLdr(uint64_t offset1, uint64_t offset2) {
+static void applyAdrpLdr(uint8_t *buf, const ConcatInputSection *isec,
+ uint64_t offset1, uint64_t offset2) {
uint32_t ins1 = read32le(buf + offset1);
uint32_t ins2 = read32le(buf + offset2);
Adrp adrp;
@@ -426,15 +411,15 @@ void OptimizationHintContext::applyAdrpLdr(uint64_t offset1, uint64_t offset2) {
// GOT loads are emitted by the compiler as a pair of adrp and ldr instructions,
// but they may be changed to adrp+add by relaxGotLoad(). This hint performs
// the AdrpLdr or AdrpAdd transformation depending on whether it was relaxed.
-void OptimizationHintContext::applyAdrpLdrGot(uint64_t offset1,
- uint64_t offset2) {
+static void applyAdrpLdrGot(uint8_t *buf, const ConcatInputSection *isec,
+ uint64_t offset1, uint64_t offset2) {
uint32_t ins2 = read32le(buf + offset2);
Add add;
Ldr ldr;
if (parseAdd(ins2, add))
- applyAdrpAdd(offset1, offset2);
+ applyAdrpAdd(buf, isec, offset1, offset2);
else if (parseLdr(ins2, ldr))
- applyAdrpLdr(offset1, offset2);
+ applyAdrpLdr(buf, isec, offset1, offset2);
}
// Optimizes an adrp+add+ldr sequence used for loading from a local symbol's
@@ -444,9 +429,9 @@ void OptimizationHintContext::applyAdrpLdrGot(uint64_t offset1,
// adrp x0, _foo at PAGE
// add x1, x0, _foo at PAGEOFF
// ldr x2, [x1, #off]
-void OptimizationHintContext::applyAdrpAddLdr(uint64_t offset1,
- uint64_t offset2,
- uint64_t offset3) {
+static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
+ uint64_t offset1, uint64_t offset2,
+ uint64_t offset3) {
uint32_t ins1 = read32le(buf + offset1);
Adrp adrp;
if (!parseAdrp(ins1, adrp))
@@ -512,15 +497,15 @@ void OptimizationHintContext::applyAdrpAddLdr(uint64_t offset1,
// the GOT entry can be loaded with a single literal ldr instruction.
// If the referenced symbol is local and thus has been relaxed to adrp+add+ldr,
// we perform the AdrpAddLdr transformation.
-void OptimizationHintContext::applyAdrpLdrGotLdr(uint64_t offset1,
- uint64_t offset2,
- uint64_t offset3) {
+static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
+ uint64_t offset1, uint64_t offset2,
+ uint64_t offset3) {
uint32_t ins2 = read32le(buf + offset2);
Add add;
Ldr ldr2;
if (parseAdd(ins2, add)) {
- applyAdrpAddLdr(offset1, offset2, offset3);
+ applyAdrpAddLdr(buf, isec, offset1, offset2, offset3);
} else if (parseLdr(ins2, ldr2)) {
// adrp x1, _foo at GOTPAGE
// ldr x2, [x1, _foo at GOTPAGEOFF]
@@ -559,47 +544,167 @@ void OptimizationHintContext::applyAdrpLdrGotLdr(uint64_t offset1,
}
}
-void ARM64::applyOptimizationHints(uint8_t *buf,
- const ConcatInputSection *isec) const {
- assert(isec);
+static uint64_t readValue(const uint8_t *&ptr, const uint8_t *end) {
+ unsigned int n = 0;
+ uint64_t value = decodeULEB128(ptr, &n, end);
+ ptr += n;
+ return value;
+}
- // Note: Some of these optimizations might not be valid when shared regions
- // are in use. Will need to revisit this if splitSegInfo is added.
+template <typename Callback>
+static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
+ std::array<uint64_t, 3> args;
- OptimizationHintContext ctx(buf, isec);
- for (const OptimizationHint &hint : isec->optimizationHints) {
- switch (hint.type) {
- case LOH_ARM64_ADRP_ADRP:
- // This is done in another pass because the other optimization hints
- // might cause its targets to be turned into NOPs.
+ for (const uint8_t *p = data.begin(), *end = data.end(); p < end;) {
+ uint64_t type = readValue(p, end);
+ if (type == 0)
+ break;
+
+ uint64_t argCount = readValue(p, end);
+ // All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
+ if (argCount > 3) {
+ for (unsigned i = 0; i < argCount; ++i)
+ readValue(p, end);
+ continue;
+ }
+
+ for (unsigned i = 0; i < argCount; ++i)
+ args[i] = readValue(p, end);
+ callback(type, ArrayRef<uint64_t>(args.data(), argCount));
+ }
+}
+
+// On RISC architectures like arm64, materializing a memory address generally
+// takes multiple instructions. If the referenced symbol is located close enough
+// in memory, fewer instructions are needed.
+//
+// Linker optimization hints record where addresses are computed. After
+// addresses have been assigned, if possible, we change them to a shorter
+// sequence of instructions. The size of the binary is not modified; the
+// eliminated instructions are replaced with NOPs. This still leads to faster
+// code as the CPU can skip over NOPs quickly.
+//
+// LOHs are specified by the LC_LINKER_OPTIMIZATION_HINTS load command, which
+// points to a sequence of ULEB128-encoded numbers. Each entry specifies a
+// transformation kind, and 2 or 3 addresses where the instructions are located.
+void ARM64::applyOptimizationHints(uint8_t *outBuf, const ObjFile &obj) const {
+ ArrayRef<uint8_t> data = obj.getOptimizationHints();
+ if (data.empty())
+ return;
+
+ const ConcatInputSection *section = nullptr;
+ uint64_t sectionAddr = 0;
+ uint8_t *buf = nullptr;
+
+ auto findSection = [&](uint64_t addr) {
+ if (section && addr >= sectionAddr &&
+ addr < sectionAddr + section->getSize())
+ return true;
+
+ auto secIt = std::prev(llvm::upper_bound(
+ obj.sections, addr,
+ [](uint64_t off, const Section *sec) { return off < sec->addr; }));
+ const Section *sec = *secIt;
+
+ auto subsecIt = std::prev(llvm::upper_bound(
+ sec->subsections, addr - sec->addr,
+ [](uint64_t off, Subsection subsec) { return off < subsec.offset; }));
+ const Subsection &subsec = *subsecIt;
+ const ConcatInputSection *isec =
+ dyn_cast_or_null<ConcatInputSection>(subsec.isec);
+ if (!isec || isec->shouldOmitFromOutput())
+ return false;
+
+ section = isec;
+ sectionAddr = subsec.offset + sec->addr;
+ buf = outBuf + section->outSecOff + section->parent->fileOff;
+ return true;
+ };
+
+ auto isValidOffset = [&](uint64_t offset) {
+ if (offset < sectionAddr || offset >= sectionAddr + section->getSize()) {
+ error("linker optimization hint spans multiple sections");
+ return false;
+ }
+ return true;
+ };
+
+ bool hasAdrpAdrp = false;
+ forEachHint(data, [&](uint64_t kind, ArrayRef<uint64_t> args) {
+ if (kind == LOH_ARM64_ADRP_ADRP) {
+ hasAdrpAdrp = true;
+ return;
+ }
+
+ if (!findSection(args[0]))
+ return;
+ switch (kind) {
+ case LOH_ARM64_ADRP_ADD:
+ if (isValidOffset(args[1]))
+ applyAdrpAdd(buf, section, args[0] - sectionAddr,
+ args[1] - sectionAddr);
break;
case LOH_ARM64_ADRP_LDR:
- ctx.applyAdrpLdr(hint.offset0, hint.offset0 + hint.delta[0]);
+ if (isValidOffset(args[1]))
+ applyAdrpLdr(buf, section, args[0] - sectionAddr,
+ args[1] - sectionAddr);
+ break;
+ case LOH_ARM64_ADRP_LDR_GOT:
+ if (isValidOffset(args[1]))
+ applyAdrpLdrGot(buf, section, args[0] - sectionAddr,
+ args[1] - sectionAddr);
break;
case LOH_ARM64_ADRP_ADD_LDR:
- ctx.applyAdrpAddLdr(hint.offset0, hint.offset0 + hint.delta[0],
- hint.offset0 + hint.delta[1]);
+ if (isValidOffset(args[1]) && isValidOffset(args[2]))
+ applyAdrpAddLdr(buf, section, args[0] - sectionAddr,
+ args[1] - sectionAddr, args[2] - sectionAddr);
break;
case LOH_ARM64_ADRP_LDR_GOT_LDR:
- ctx.applyAdrpLdrGotLdr(hint.offset0, hint.offset0 + hint.delta[0],
- hint.offset0 + hint.delta[1]);
+ if (isValidOffset(args[1]) && isValidOffset(args[2]))
+ applyAdrpLdrGotLdr(buf, section, args[0] - sectionAddr,
+ args[1] - sectionAddr, args[2] - sectionAddr);
break;
case LOH_ARM64_ADRP_ADD_STR:
case LOH_ARM64_ADRP_LDR_GOT_STR:
// TODO: Implement these
break;
- case LOH_ARM64_ADRP_ADD:
- ctx.applyAdrpAdd(hint.offset0, hint.offset0 + hint.delta[0]);
- break;
- case LOH_ARM64_ADRP_LDR_GOT:
- ctx.applyAdrpLdrGot(hint.offset0, hint.offset0 + hint.delta[0]);
- break;
}
- }
+ });
+
+ if (!hasAdrpAdrp)
+ return;
- for (const OptimizationHint &hint : isec->optimizationHints)
- if (hint.type == LOH_ARM64_ADRP_ADRP)
- ctx.applyAdrpAdrp(hint.offset0, hint.offset0 + hint.delta[0]);
+ // AdrpAdrp optimization hints are performed in a second pass because they
+ // might interfere with other transformations. For instance, consider the
+ // following input:
+ //
+ // adrp x0, _foo at PAGE
+ // add x1, x0, _foo at PAGEOFF
+ // adrp x0, _bar at PAGE
+ // add x2, x0, _bar at PAGEOFF
+ //
+ // If we perform the AdrpAdrp relaxation first, we get:
+ //
+ // adrp x0, _foo at PAGE
+ // add x1, x0, _foo at PAGEOFF
+ // nop
+ // add x2, x0, _bar at PAGEOFF
+ //
+ // If we then apply AdrpAdd to the first two instructions, the add will have a
+ // garbage value in x0:
+ //
+ // adr x1, _foo
+ // nop
+ // nop
+ // add x2, x0, _bar at PAGEOFF
+ forEachHint(data, [&](uint64_t kind, ArrayRef<uint64_t> args) {
+ if (kind != LOH_ARM64_ADRP_ADRP)
+ return;
+ if (!findSection(args[0]))
+ return;
+ if (isValidOffset(args[1]))
+ applyAdrpAdrp(buf, section, args[0] - sectionAddr, args[1] - sectionAddr);
+ });
}
TargetInfo *macho::createARM64TargetInfo() {
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 588b87a2927f9..ee382a5aeabc3 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -463,155 +463,6 @@ static Defined *findSymbolAtOffset(const ConcatInputSection *isec,
return *it;
}
-// Linker optimization hints mark a sequence of instructions used for
-// synthesizing an address which that be transformed into a faster sequence. The
-// transformations depend on conditions that are determined at link time, like
-// the distance to the referenced symbol or its alignment.
-//
-// Each hint has a type and refers to 2 or 3 instructions. Each of those
-// instructions must have a corresponding relocation. After addresses have been
-// finalized and relocations have been performed, we check if the requirements
-// hold, and perform the optimizations if they do.
-//
-// Similar linker relaxations exist for ELF as well, with the
diff erence being
-// that the explicit marking allows for the relaxation of non-consecutive
-// relocations too.
-//
-// The specific types of hints are documented in Arch/ARM64.cpp
-void ObjFile::parseOptimizationHints(ArrayRef<uint8_t> data) {
- auto expectedArgCount = [](uint8_t type) {
- switch (type) {
- case LOH_ARM64_ADRP_ADRP:
- case LOH_ARM64_ADRP_LDR:
- case LOH_ARM64_ADRP_ADD:
- case LOH_ARM64_ADRP_LDR_GOT:
- return 2;
- case LOH_ARM64_ADRP_ADD_LDR:
- case LOH_ARM64_ADRP_ADD_STR:
- case LOH_ARM64_ADRP_LDR_GOT_LDR:
- case LOH_ARM64_ADRP_LDR_GOT_STR:
- return 3;
- }
- return -1;
- };
-
- // Each hint contains at least 4 ULEB128-encoded fields, so in the worst case,
- // there are data.size() / 4 LOHs. It's a huge overestimation though, as
- // offsets are unlikely to fall in the 0-127 byte range, so we pre-allocate
- // half as much.
- optimizationHints.reserve(data.size() / 8);
-
- for (const uint8_t *p = data.begin(); p < data.end();) {
- const ptr
diff _t inputOffset = p - data.begin();
- unsigned int n = 0;
- uint8_t type = decodeULEB128(p, &n, data.end());
- p += n;
-
- // An entry of type 0 terminates the list.
- if (type == 0)
- break;
-
- int expectedCount = expectedArgCount(type);
- if (LLVM_UNLIKELY(expectedCount == -1)) {
- error("Linker optimization hint at offset " + Twine(inputOffset) +
- " has unknown type " + Twine(type));
- return;
- }
-
- uint8_t argCount = decodeULEB128(p, &n, data.end());
- p += n;
-
- if (LLVM_UNLIKELY(argCount != expectedCount)) {
- error("Linker optimization hint at offset " + Twine(inputOffset) +
- " has " + Twine(argCount) + " arguments instead of the expected " +
- Twine(expectedCount));
- return;
- }
-
- uint64_t offset0 = decodeULEB128(p, &n, data.end());
- p += n;
-
- int16_t delta[2];
- for (int i = 0; i < argCount - 1; ++i) {
- uint64_t address = decodeULEB128(p, &n, data.end());
- p += n;
- int64_t d = address - offset0;
- if (LLVM_UNLIKELY(d > std::numeric_limits<int16_t>::max() ||
- d < std::numeric_limits<int16_t>::min())) {
- error("Linker optimization hint at offset " + Twine(inputOffset) +
- " has addresses too far apart");
- return;
- }
- delta[i] = d;
- }
-
- optimizationHints.push_back({offset0, {delta[0], delta[1]}, type});
- }
-
- // We sort the per-object vector of optimization hints so each section only
- // needs to hold an ArrayRef to a contiguous range of hints.
- llvm::sort(optimizationHints,
- [](const OptimizationHint &a, const OptimizationHint &b) {
- return a.offset0 < b.offset0;
- });
-
- auto section = sections.begin();
- auto subsection = (*section)->subsections.begin();
- uint64_t subsectionBase = 0;
- uint64_t subsectionEnd = 0;
-
- auto updateAddr = [&]() {
- subsectionBase = (*section)->addr + subsection->offset;
- subsectionEnd = subsectionBase + subsection->isec->getSize();
- };
-
- auto advanceSubsection = [&]() {
- if (section == sections.end())
- return;
- ++subsection;
- while (subsection == (*section)->subsections.end()) {
- ++section;
- if (section == sections.end())
- return;
- subsection = (*section)->subsections.begin();
- }
- };
-
- updateAddr();
- auto hintStart = optimizationHints.begin();
- for (auto hintEnd = hintStart, end = optimizationHints.end(); hintEnd != end;
- ++hintEnd) {
- if (hintEnd->offset0 >= subsectionEnd) {
- subsection->isec->optimizationHints =
- ArrayRef<OptimizationHint>(&*hintStart, hintEnd - hintStart);
-
- hintStart = hintEnd;
- while (hintStart->offset0 >= subsectionEnd) {
- advanceSubsection();
- if (section == sections.end())
- break;
- updateAddr();
- assert(hintStart->offset0 >= subsectionBase);
- }
- }
-
- hintEnd->offset0 -= subsectionBase;
- for (int i = 0, count = expectedArgCount(hintEnd->type); i < count - 1;
- ++i) {
- if (LLVM_UNLIKELY(
- hintEnd->delta[i] < -static_cast<int64_t>(hintEnd->offset0) ||
- hintEnd->delta[i] >=
- static_cast<int64_t>(subsectionEnd - hintEnd->offset0))) {
- error("Linker optimization hint spans multiple sections");
- return;
- }
- }
- }
- if (section != sections.end())
- subsection->isec->optimizationHints = ArrayRef<OptimizationHint>(
- &*hintStart, optimizationHints.end() - hintStart);
-}
-
template <class SectionHeader>
static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec,
relocation_info rel) {
@@ -1129,11 +980,6 @@ template <class LP> void ObjFile::parse() {
if (!sections[i]->subsections.empty())
parseRelocations(sectionHeaders, sectionHeaders[i], *sections[i]);
- if (!config->ignoreOptimizationHints)
- if (auto *cmd = findCommand<linkedit_data_command>(
- hdr, LC_LINKER_OPTIMIZATION_HINT))
- parseOptimizationHints({buf + cmd->dataoff, cmd->datasize});
-
parseDebugInfo();
Section *ehFrameSection = nullptr;
@@ -1213,6 +1059,14 @@ ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
c->datasize / sizeof(data_in_code_entry)};
}
+ArrayRef<uint8_t> ObjFile::getOptimizationHints() const {
+ const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
+ if (auto *cmd =
+ findCommand<linkedit_data_command>(buf, LC_LINKER_OPTIMIZATION_HINT))
+ return {buf + cmd->dataoff, cmd->datasize};
+ return {};
+}
+
// Create pointers from symbols to their associated compact unwind entries.
void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
for (const Subsection &subsection : compactUnwindSection.subsections) {
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 1b454f98932a8..b883bd040f889 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -159,6 +159,7 @@ class ObjFile final : public InputFile {
ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
bool lazy = false, bool forceHidden = false);
ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
+ ArrayRef<uint8_t> getOptimizationHints() const;
template <class LP> void parse();
static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
@@ -176,7 +177,6 @@ class ObjFile final : public InputFile {
std::vector<ConcatInputSection *> debugSections;
std::vector<CallGraphEntry> callGraph;
llvm::DenseMap<ConcatInputSection *, FDE> fdes;
- std::vector<OptimizationHint> optimizationHints;
std::vector<AliasSymbol *> aliases;
private:
@@ -193,7 +193,6 @@ class ObjFile final : public InputFile {
void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
const SectionHeader &, Section &);
void parseDebugInfo();
- void parseOptimizationHints(ArrayRef<uint8_t> data);
void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
void registerCompactUnwind(Section &compactUnwindSection);
void registerEhFrames(Section &ehFrameSection);
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index f6a03649985e0..660a27c3d3179 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -29,8 +29,8 @@ using namespace lld::macho;
// Verify ConcatInputSection's size on 64-bit builds. The size of std::vector
// can
diff er based on STL debug levels (e.g. iterator debugging on MSVC's STL),
// so account for that.
-static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection) ==
- sizeof(std::vector<Reloc>) + 104,
+static_assert(sizeof(void *) != 8 ||
+ sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 88,
"Try to minimize ConcatInputSection's size, we create many "
"instances of it");
@@ -219,8 +219,6 @@ void ConcatInputSection::writeTo(uint8_t *buf) {
}
target->relocateOne(loc, r, referentVA, getVA() + r.offset);
}
-
- target->applyOptimizationHints(buf, this);
}
ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName,
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 2b8369142b802..ecb46f926a0a5 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -83,7 +83,6 @@ class InputSection {
OutputSection *parent = nullptr;
ArrayRef<uint8_t> data;
std::vector<Reloc> relocs;
- ArrayRef<OptimizationHint> optimizationHints;
// The symbols that belong to this InputSection, sorted by value. With
// .subsections_via_symbols, there is typically only one element here.
llvm::TinyPtrVector<Defined *> symbols;
diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h
index d0eba4643bd07..023d25a795a0d 100644
--- a/lld/MachO/Relocations.h
+++ b/lld/MachO/Relocations.h
@@ -69,14 +69,6 @@ struct Reloc {
addend(addend), referent(referent) {}
};
-struct OptimizationHint {
- // Offset of the first address within the containing InputSection.
- uint64_t offset0;
- // Offset of the other addresses relative to the first one.
- int16_t delta[2];
- uint8_t type;
-};
-
bool validateSymbolRelocation(const Symbol *, const InputSection *,
const Reloc &);
diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index ea8141740413e..ff7998b96ce8a 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -27,7 +27,7 @@ class Symbol;
class Defined;
class DylibSymbol;
class InputSection;
-class ConcatInputSection;
+class ObjFile;
class TargetInfo {
public:
@@ -97,8 +97,7 @@ class TargetInfo {
llvm_unreachable("Unsupported architecture for dtrace symbols");
}
- virtual void applyOptimizationHints(uint8_t *buf,
- const ConcatInputSection *) const {};
+ virtual void applyOptimizationHints(uint8_t *, const ObjFile &) const {};
uint32_t magic;
llvm::MachO::CPUType cpuType;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 44c34db00b1fd..a8ae554a6246f 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -60,6 +60,7 @@ class Writer {
void openFile();
void writeSections();
+ void applyOptimizationHints();
void writeUuid();
void writeCodeSignature();
void writeOutputFile();
@@ -1072,6 +1073,18 @@ void Writer::writeSections() {
});
}
+void Writer::applyOptimizationHints() {
+ if (config->arch() != AK_arm64 || config->ignoreOptimizationHints)
+ return;
+
+ uint8_t *buf = buffer->getBufferStart();
+ TimeTraceScope timeScope("Apply linker optimization hints");
+ parallelForEach(inputFiles, [buf](const InputFile *file) {
+ if (const auto *objFile = dyn_cast<ObjFile>(file))
+ target->applyOptimizationHints(buf, *objFile);
+ });
+}
+
// In order to utilize multiple cores, we first split the buffer into chunks,
// compute a hash for each chunk, and then compute a hash value of the hash
// values.
@@ -1114,6 +1127,7 @@ void Writer::writeOutputFile() {
if (errorCount())
return;
writeSections();
+ applyOptimizationHints();
writeUuid();
writeCodeSignature();
diff --git a/lld/test/MachO/invalid/invalid-loh.s b/lld/test/MachO/invalid/invalid-loh.s
index 19ed52866948a..9bf6b012709b8 100644
--- a/lld/test/MachO/invalid/invalid-loh.s
+++ b/lld/test/MachO/invalid/invalid-loh.s
@@ -1,15 +1,10 @@
# REQUIRES: aarch64
-# RUN: rm -rf %t; split-file %s %t
-# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/section.s -o %t/section.o
-# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/far.s -o %t/far.o
-# RUN: not %lld -arch arm64 %t/section.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=SECTION
-# RUN: not %lld -arch arm64 %t/far.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=FAR
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o
+# RUN: not %lld -arch arm64 %t.o -o /dev/null 2>&1 | FileCheck %s
-# SECTION: error: Linker optimization hint spans multiple sections
-# FAR: error: Linker optimization hint at offset 0 has addresses too far apart
+# CHECK: error: linker optimization hint spans multiple sections
-#--- section.s
.globl _main
_main:
L1:
@@ -23,17 +18,3 @@ _target:
.loh AdrpAdd L1, L2
.subsections_via_symbols
-
-#--- far.s
-.globl _main
-_main:
-L1:
- adrp x0, _target at PAGE
- .zero 0x8000
-L2:
- add x0, x0, _target at PAGEOFF
-
-_target:
-
-.loh AdrpAdd L1, L2
-.subsections_via_symbols
diff --git a/lld/test/MachO/loh-adrp-adrp.s b/lld/test/MachO/loh-adrp-adrp.s
index 05abc8ab1961d..55d6a614f374e 100644
--- a/lld/test/MachO/loh-adrp-adrp.s
+++ b/lld/test/MachO/loh-adrp-adrp.s
@@ -17,6 +17,11 @@
## Not an adrp instruction (invalid)
# CHECK-NEXT: nop
# CHECK-NEXT: adrp x4
+## Other relaxations take precedence over AdrpAdrp
+# CHECK-NEXT: adr x6
+# CHECK-NEXT: nop
+# CHECK-NEXT: adr x6
+# CHECK-NEXT: nop
.text
.align 2
@@ -39,6 +44,14 @@ L7:
nop
L8:
adrp x4, _baz at PAGE
+L9:
+ adrp x5, _foo at PAGE
+L10:
+ add x6, x5, _foo at PAGEOFF
+L11:
+ adrp x5, _bar at PAGE
+L12:
+ add x6, x5, _bar at PAGEOFF
.data
.align 12
@@ -54,3 +67,6 @@ _baz:
.loh AdrpAdrp L3, L4
.loh AdrpAdrp L5, L6
.loh AdrpAdrp L7, L8
+.loh AdrpAdrp L9, L11
+.loh AdrpAdd L9, L10
+.loh AdrpAdd L11, L12
More information about the llvm-commits
mailing list