[lld] [LLD][MachO][NFC] Refactor LOH code (PR #141153)

Ellis Hoag via llvm-commits llvm-commits at lists.llvm.org
Thu May 22 15:33:42 PDT 2025


https://github.com/ellishg created https://github.com/llvm/llvm-project/pull/141153

In `applyAdrpAddLdr()` we make a transformation that is identical to the one in `applyAdrpAdd()`, so lets reuse that code. Also refactor `forEachHint()` to use more `ArrayRef` and move around some lines for consistancy.

>From 8777abe746c699ac20496249207b7db44ecac4c0 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Thu, 22 May 2025 15:27:55 -0700
Subject: [PATCH] [LLD][MachO] Refactor LOH code

---
 lld/MachO/Arch/ARM64.cpp | 73 +++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 45 deletions(-)

diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 849b309edeb26..c882e106e5b07 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -15,7 +15,6 @@
 #include "lld/Common/ErrorHandler.h"
 #include "mach-o/compact_unwind_encoding.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/LEB128.h"
@@ -393,25 +392,26 @@ static void writeImmediateLdr(void *loc, const Ldr &ldr) {
 // ->
 //   adr  xM, _foo
 //   nop
-static void applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
+static bool applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
                          uint64_t offset1, uint64_t offset2) {
   uint32_t ins1 = read32le(buf + offset1);
   uint32_t ins2 = read32le(buf + offset2);
   Adrp adrp;
   Add add;
   if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add))
-    return;
+    return false;
   if (adrp.destRegister != add.srcRegister)
-    return;
+    return false;
 
   uint64_t addr1 = isec->getVA() + offset1;
   uint64_t referent = pageBits(addr1) + adrp.addend + add.addend;
   int64_t delta = referent - addr1;
   if (!isValidAdrOffset(delta))
-    return;
+    return false;
 
   writeAdr(buf + offset1, add.destRegister, delta);
   writeNop(buf + offset2);
+  return true;
 }
 
 // Transforms two adrp instructions into a single adrp if their referent
@@ -496,16 +496,12 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
                             uint64_t offset1, uint64_t offset2,
                             uint64_t offset3) {
   uint32_t ins1 = read32le(buf + offset1);
-  Adrp adrp;
-  if (!parseAdrp(ins1, adrp))
-    return;
   uint32_t ins2 = read32le(buf + offset2);
-  Add add;
-  if (!parseAdd(ins2, add))
-    return;
   uint32_t ins3 = read32le(buf + offset3);
+  Adrp adrp;
+  Add add;
   Ldr ldr;
-  if (!parseLdr(ins3, ldr))
+  if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add) || !parseLdr(ins3, ldr))
     return;
   if (adrp.destRegister != add.srcRegister)
     return;
@@ -528,18 +524,8 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
     return;
   }
 
-  // Load the target address into a register and load from there indirectly.
-  //   adr x1, _foo
-  //   nop
-  //   ldr x2, [x1, #off]
-  int64_t adrOffset = referent - addr1;
-  if (isValidAdrOffset(adrOffset)) {
-    writeAdr(buf + offset1, ldr.baseRegister, adrOffset);
-    // Note: ld64 moves the offset into the adr instruction for AdrpAddLdr, but
-    // not for AdrpLdrGotLdr. Its effect is the same either way.
-    writeNop(buf + offset2);
+  if (applyAdrpAdd(buf, isec, offset1, offset2))
     return;
-  }
 
   // Move the target's page offset into the ldr's immediate offset.
   //   adrp x0, _foo at PAGE
@@ -575,12 +561,10 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
     // ldr  x3, [x2, #off]
 
     uint32_t ins1 = read32le(buf + offset1);
-    Adrp adrp;
-    if (!parseAdrp(ins1, adrp))
-      return;
     uint32_t ins3 = read32le(buf + offset3);
+    Adrp adrp;
     Ldr ldr3;
-    if (!parseLdr(ins3, ldr3))
+    if (!parseAdrp(ins1, adrp) || !parseLdr(ins3, ldr3))
       return;
 
     if (ldr2.baseRegister != adrp.destRegister)
@@ -607,33 +591,32 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
   }
 }
 
-static uint64_t readValue(const uint8_t *&ptr, const uint8_t *end) {
-  unsigned int n = 0;
-  uint64_t value = decodeULEB128(ptr, &n, end);
-  ptr += n;
-  return value;
-}
-
 template <typename Callback>
 static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
   std::array<uint64_t, 3> args;
 
-  for (const uint8_t *p = data.begin(), *end = data.end(); p < end;) {
-    uint64_t type = readValue(p, end);
+  auto readNext = [&]() -> uint64_t {
+    unsigned int n = 0;
+    uint64_t value = decodeULEB128(data.data(), &n, data.end());
+    data = data.drop_front(n);
+    return value;
+  };
+
+  while (!data.empty()) {
+    uint64_t type = readNext();
     if (type == 0)
       break;
 
-    uint64_t argCount = readValue(p, end);
+    uint64_t argCount = readNext();
+    for (unsigned i = 0; i < argCount; ++i) {
+      uint64_t arg = readNext();
+      if (i < 3)
+        args[i] = arg;
+    }
     // All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
-    if (argCount > 3) {
-      for (unsigned i = 0; i < argCount; ++i)
-        readValue(p, end);
+    if (argCount > 3)
       continue;
-    }
-
-    for (unsigned i = 0; i < argCount; ++i)
-      args[i] = readValue(p, end);
-    callback(type, ArrayRef<uint64_t>(args.data(), argCount));
+    callback(type, ArrayRef(args.data(), argCount));
   }
 }
 



More information about the llvm-commits mailing list