[lld] [LLD][MachO][NFC] Refactor LOH code (PR #141153)
Ellis Hoag via llvm-commits
llvm-commits at lists.llvm.org
Thu May 22 15:33:42 PDT 2025
https://github.com/ellishg created https://github.com/llvm/llvm-project/pull/141153
In `applyAdrpAddLdr()` we make a transformation that is identical to the one in `applyAdrpAdd()`, so lets reuse that code. Also refactor `forEachHint()` to use more `ArrayRef` and move around some lines for consistancy.
>From 8777abe746c699ac20496249207b7db44ecac4c0 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Thu, 22 May 2025 15:27:55 -0700
Subject: [PATCH] [LLD][MachO] Refactor LOH code
---
lld/MachO/Arch/ARM64.cpp | 73 +++++++++++++++-------------------------
1 file changed, 28 insertions(+), 45 deletions(-)
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 849b309edeb26..c882e106e5b07 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -15,7 +15,6 @@
#include "lld/Common/ErrorHandler.h"
#include "mach-o/compact_unwind_encoding.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
@@ -393,25 +392,26 @@ static void writeImmediateLdr(void *loc, const Ldr &ldr) {
// ->
// adr xM, _foo
// nop
-static void applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
+static bool applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2) {
uint32_t ins1 = read32le(buf + offset1);
uint32_t ins2 = read32le(buf + offset2);
Adrp adrp;
Add add;
if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add))
- return;
+ return false;
if (adrp.destRegister != add.srcRegister)
- return;
+ return false;
uint64_t addr1 = isec->getVA() + offset1;
uint64_t referent = pageBits(addr1) + adrp.addend + add.addend;
int64_t delta = referent - addr1;
if (!isValidAdrOffset(delta))
- return;
+ return false;
writeAdr(buf + offset1, add.destRegister, delta);
writeNop(buf + offset2);
+ return true;
}
// Transforms two adrp instructions into a single adrp if their referent
@@ -496,16 +496,12 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2,
uint64_t offset3) {
uint32_t ins1 = read32le(buf + offset1);
- Adrp adrp;
- if (!parseAdrp(ins1, adrp))
- return;
uint32_t ins2 = read32le(buf + offset2);
- Add add;
- if (!parseAdd(ins2, add))
- return;
uint32_t ins3 = read32le(buf + offset3);
+ Adrp adrp;
+ Add add;
Ldr ldr;
- if (!parseLdr(ins3, ldr))
+ if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add) || !parseLdr(ins3, ldr))
return;
if (adrp.destRegister != add.srcRegister)
return;
@@ -528,18 +524,8 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
return;
}
- // Load the target address into a register and load from there indirectly.
- // adr x1, _foo
- // nop
- // ldr x2, [x1, #off]
- int64_t adrOffset = referent - addr1;
- if (isValidAdrOffset(adrOffset)) {
- writeAdr(buf + offset1, ldr.baseRegister, adrOffset);
- // Note: ld64 moves the offset into the adr instruction for AdrpAddLdr, but
- // not for AdrpLdrGotLdr. Its effect is the same either way.
- writeNop(buf + offset2);
+ if (applyAdrpAdd(buf, isec, offset1, offset2))
return;
- }
// Move the target's page offset into the ldr's immediate offset.
// adrp x0, _foo at PAGE
@@ -575,12 +561,10 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
// ldr x3, [x2, #off]
uint32_t ins1 = read32le(buf + offset1);
- Adrp adrp;
- if (!parseAdrp(ins1, adrp))
- return;
uint32_t ins3 = read32le(buf + offset3);
+ Adrp adrp;
Ldr ldr3;
- if (!parseLdr(ins3, ldr3))
+ if (!parseAdrp(ins1, adrp) || !parseLdr(ins3, ldr3))
return;
if (ldr2.baseRegister != adrp.destRegister)
@@ -607,33 +591,32 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
}
}
-static uint64_t readValue(const uint8_t *&ptr, const uint8_t *end) {
- unsigned int n = 0;
- uint64_t value = decodeULEB128(ptr, &n, end);
- ptr += n;
- return value;
-}
-
template <typename Callback>
static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
std::array<uint64_t, 3> args;
- for (const uint8_t *p = data.begin(), *end = data.end(); p < end;) {
- uint64_t type = readValue(p, end);
+ auto readNext = [&]() -> uint64_t {
+ unsigned int n = 0;
+ uint64_t value = decodeULEB128(data.data(), &n, data.end());
+ data = data.drop_front(n);
+ return value;
+ };
+
+ while (!data.empty()) {
+ uint64_t type = readNext();
if (type == 0)
break;
- uint64_t argCount = readValue(p, end);
+ uint64_t argCount = readNext();
+ for (unsigned i = 0; i < argCount; ++i) {
+ uint64_t arg = readNext();
+ if (i < 3)
+ args[i] = arg;
+ }
// All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
- if (argCount > 3) {
- for (unsigned i = 0; i < argCount; ++i)
- readValue(p, end);
+ if (argCount > 3)
continue;
- }
-
- for (unsigned i = 0; i < argCount; ++i)
- args[i] = readValue(p, end);
- callback(type, ArrayRef<uint64_t>(args.data(), argCount));
+ callback(type, ArrayRef(args.data(), argCount));
}
}
More information about the llvm-commits
mailing list