[lld] 002ca63 - [ELF] Pass Ctx & to (read|write)(16|64)

Sun Oct 13 10:47:33 PDT 2024

Author: Fangrui Song
Date: 2024-10-13T10:47:18-07:00
New Revision: 002ca63b3f1cb660b831a78d29abdfe33eaffbb4

URL: https://github.com/llvm/llvm-project/commit/002ca63b3f1cb660b831a78d29abdfe33eaffbb4
DIFF: https://github.com/llvm/llvm-project/commit/002ca63b3f1cb660b831a78d29abdfe33eaffbb4.diff

LOG: [ELF] Pass Ctx & to (read|write)(16|64)

Added: 
    

Modified: 
    lld/ELF/Arch/AArch64.cpp
    lld/ELF/Arch/AMDGPU.cpp
    lld/ELF/Arch/ARM.cpp
    lld/ELF/Arch/Mips.cpp
    lld/ELF/Arch/PPC.cpp
    lld/ELF/Arch/PPC64.cpp
    lld/ELF/OutputSections.cpp
    lld/ELF/SyntheticSections.cpp
    lld/ELF/Target.h
    lld/ELF/Thunks.cpp

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index e1b06240493635..ae03fde21c7993 100644

--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -255,14 +255,14 @@ RelType AArch64::getDynRel(RelType type) const {
 int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
   switch (type) {
   case R_AARCH64_TLSDESC:
-    return read64(buf + 8);
+    return read64(ctx, buf + 8);
   case R_AARCH64_NONE:
   case R_AARCH64_GLOB_DAT:
   case R_AARCH64_JUMP_SLOT:
     return 0;
   case R_AARCH64_ABS16:
   case R_AARCH64_PREL16:
-    return SignExtend64<16>(read16(buf));
+    return SignExtend64<16>(read16(ctx, buf));
   case R_AARCH64_ABS32:
   case R_AARCH64_PREL32:
     return SignExtend64<32>(read32(ctx, buf));
@@ -271,7 +271,7 @@ int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_AARCH64_RELATIVE:
   case R_AARCH64_IRELATIVE:
   case R_AARCH64_TLS_TPREL64:
-    return read64(buf);
+    return read64(ctx, buf);
 
     // The following relocation types all point at instructions, and
     // relocate an immediate field in the instruction.
@@ -355,12 +355,12 @@ int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
 }
 
 void AArch64::writeGotPlt(uint8_t *buf, const Symbol &) const {
-  write64(buf, ctx.in.plt->getVA());
+  write64(ctx, buf, ctx.in.plt->getVA());
 }
 
 void AArch64::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
   if (ctx.arg.writeAddends)
-    write64(buf, s.getVA());
+    write64(ctx, buf, s.getVA());
 }
 
 void AArch64::writePltHeader(uint8_t *buf) const {
@@ -485,7 +485,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
   case R_AARCH64_ABS16:
   case R_AARCH64_PREL16:
     checkIntUInt(loc, val, 16, rel);
-    write16(loc, val);
+    write16(ctx, loc, val);
     break;
   case R_AARCH64_ABS32:
   case R_AARCH64_PREL32:
@@ -508,12 +508,12 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
     if (rel.sym && rel.sym->isTagged() &&
         (rel.addend < 0 ||
          rel.addend >= static_cast<int64_t>(rel.sym->getSize())))
-      write64(loc, -rel.addend);
+      write64(ctx, loc, -rel.addend);
     else
-      write64(loc, val);
+      write64(ctx, loc, val);
     break;
   case R_AARCH64_PREL64:
-    write64(loc, val);
+    write64(ctx, loc, val);
     break;
   case R_AARCH64_AUTH_ABS64:
     // If val is wider than 32 bits, the relocation must have been moved from
@@ -662,7 +662,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
     break;
   case R_AARCH64_TLSDESC:
     // For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word.
-    write64(loc + 8, val);
+    write64(ctx, loc + 8, val);
     break;
   default:
     llvm_unreachable("unknown relocation");

diff  --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp
index 130da19d0e210d..ce37d0adc5fbb8 100644
--- a/lld/ELF/Arch/AMDGPU.cpp
+++ b/lld/ELF/Arch/AMDGPU.cpp
@@ -211,7 +211,7 @@ int64_t AMDGPU::getImplicitAddend(const uint8_t *buf, RelType type) const {
     return 0;
   case R_AMDGPU_ABS64:
   case R_AMDGPU_RELATIVE64:
-    return read64(buf);
+    return read64(ctx, buf);
   default:
     internalLinkerError(getErrorLoc(ctx, buf),
                         "cannot read addend for relocation " + toString(type));

diff  --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index d6fbea13e8d725..0b09a083ce2b0d 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -255,14 +255,14 @@ void ARM::writePltHeader(uint8_t *buf) const {
     //
     uint64_t offset = ctx.in.gotPlt->getVA() - ctx.in.plt->getVA() - 16;
     assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
-    write16(buf + 0, 0xb500);
+    write16(ctx, buf + 0, 0xb500);
     // Split into two halves to support endianness correctly.
-    write16(buf + 2, 0xf8df);
-    write16(buf + 4, 0xe008);
-    write16(buf + 6, 0x44fe);
+    write16(ctx, buf + 2, 0xf8df);
+    write16(ctx, buf + 4, 0xe008);
+    write16(ctx, buf + 6, 0x44fe);
     // Split into two halves to support endianness correctly.
-    write16(buf + 8, 0xf85e);
-    write16(buf + 10, 0xff08);
+    write16(ctx, buf + 8, 0xf85e);
+    write16(ctx, buf + 10, 0xff08);
     write32(ctx, buf + 12, offset);
 
     memcpy(buf + 16, trapInstr.data(), 4);  // Pad to 32-byte boundary
@@ -361,17 +361,17 @@ void ARM::writePlt(uint8_t *buf, const Symbol &sym,
     // where ip = r12 = 0xc
 
     // movw ip, #<lower 16 bits>
-    write16(buf + 2, 0x0c00); // use `ip`
+    write16(ctx, buf + 2, 0x0c00); // use `ip`
     relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, offset);
 
     // movt ip, #<upper 16 bits>
-    write16(buf + 6, 0x0c00); // use `ip`
+    write16(ctx, buf + 6, 0x0c00); // use `ip`
     relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, offset);
 
-    write16(buf + 8, 0x44fc);       // add ip, pc
-    write16(buf + 10, 0xf8dc);      // ldr.w   pc, [ip] (bottom half)
-    write16(buf + 12, 0xf000);      // ldr.w   pc, [ip] (upper half)
-    write16(buf + 14, 0xe7fc);      // Branch to previous instruction
+    write16(ctx, buf + 8, 0x44fc);  // add ip, pc
+    write16(ctx, buf + 10, 0xf8dc); // ldr.w   pc, [ip] (bottom half)
+    write16(ctx, buf + 12, 0xf000); // ldr.w   pc, [ip] (upper half)
+    write16(ctx, buf + 14, 0xe7fc); // Branch to previous instruction
   }
 }
 
@@ -662,25 +662,25 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_ARM_THM_JUMP8:
     // We do a 9 bit check because val is right-shifted by 1 bit.
     checkInt(loc, val, 9, rel);
-    write16(loc, (read32(ctx, loc) & 0xff00) | ((val >> 1) & 0x00ff));
+    write16(ctx, loc, (read32(ctx, loc) & 0xff00) | ((val >> 1) & 0x00ff));
     break;
   case R_ARM_THM_JUMP11:
     // We do a 12 bit check because val is right-shifted by 1 bit.
     checkInt(loc, val, 12, rel);
-    write16(loc, (read32(ctx, loc) & 0xf800) | ((val >> 1) & 0x07ff));
+    write16(ctx, loc, (read32(ctx, loc) & 0xf800) | ((val >> 1) & 0x07ff));
     break;
   case R_ARM_THM_JUMP19:
     // Encoding T3: Val = S:J2:J1:imm6:imm11:0
     checkInt(loc, val, 21, rel);
-    write16(loc,
-              (read16(loc) & 0xfbc0) |   // opcode cond
-                  ((val >> 10) & 0x0400) | // S
-                  ((val >> 12) & 0x003f)); // imm6
-    write16(loc + 2,
-              0x8000 |                    // opcode
-                  ((val >> 8) & 0x0800) | // J2
-                  ((val >> 5) & 0x2000) | // J1
-                  ((val >> 1) & 0x07ff)); // imm11
+    write16(ctx, loc,
+            (read16(ctx, loc) & 0xfbc0) | // opcode cond
+                ((val >> 10) & 0x0400) |  // S
+                ((val >> 12) & 0x003f));  // imm6
+    write16(ctx, loc + 2,
+            0x8000 |                    // opcode
+                ((val >> 8) & 0x0800) | // J2
+                ((val >> 5) & 0x2000) | // J1
+                ((val >> 1) & 0x07ff)); // imm11
     break;
   case R_ARM_THM_CALL: {
     // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type
@@ -691,7 +691,7 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().
     bool bit0Thumb = val & 1;
     bool useThumb = bit0Thumb || useThumbPLTs(ctx);
-    bool isBlx = (read16(loc + 2) & 0x1000) == 0;
+    bool isBlx = (read16(ctx, loc + 2) & 0x1000) == 0;
     // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
     // even when type not STT_FUNC.
     if (!rel.sym->isFunc() && !rel.sym->isInPlt(ctx) && isBlx == useThumb)
@@ -701,21 +701,21 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
       // the BLX instruction may only be two byte aligned. This must be done
       // before overflow check.
       val = alignTo(val, 4);
-      write16(loc + 2, read16(loc + 2) & ~0x1000);
+      write16(ctx, loc + 2, read16(ctx, loc + 2) & ~0x1000);
     } else {
-      write16(loc + 2, (read16(loc + 2) & ~0x1000) | 1 << 12);
+      write16(ctx, loc + 2, (read16(ctx, loc + 2) & ~0x1000) | 1 << 12);
     }
     if (!ctx.arg.armJ1J2BranchEncoding) {
       // Older Arm architectures do not support R_ARM_THM_JUMP24 and have
       // 
diff erent encoding rules and range due to J1 and J2 always being 1.
       checkInt(loc, val, 23, rel);
-      write16(loc,
-                0xf000 |                     // opcode
-                    ((val >> 12) & 0x07ff)); // imm11
-      write16(loc + 2,
-                (read16(loc + 2) & 0xd000) | // opcode
-                    0x2800 |                   // J1 == J2 == 1
-                    ((val >> 1) & 0x07ff));    // imm11
+      write16(ctx, loc,
+              0xf000 |                     // opcode
+                  ((val >> 12) & 0x07ff)); // imm11
+      write16(ctx, loc + 2,
+              (read16(ctx, loc + 2) & 0xd000) | // opcode
+                  0x2800 |                      // J1 == J2 == 1
+                  ((val >> 1) & 0x07ff));       // imm11
       break;
     }
   }
@@ -724,15 +724,15 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_ARM_THM_JUMP24:
     // Encoding B  T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0
     checkInt(loc, val, 25, rel);
-    write16(loc,
-              0xf000 |                     // opcode
-                  ((val >> 14) & 0x0400) | // S
-                  ((val >> 12) & 0x03ff)); // imm10
-    write16(loc + 2,
-              (read16(loc + 2) & 0xd000) |                  // opcode
-                  (((~(val >> 10)) ^ (val >> 11)) & 0x2000) | // J1
-                  (((~(val >> 11)) ^ (val >> 13)) & 0x0800) | // J2
-                  ((val >> 1) & 0x07ff));                     // imm11
+    write16(ctx, loc,
+            0xf000 |                     // opcode
+                ((val >> 14) & 0x0400) | // S
+                ((val >> 12) & 0x03ff)); // imm10
+    write16(ctx, loc + 2,
+            (read16(ctx, loc + 2) & 0xd000) |               // opcode
+                (((~(val >> 10)) ^ (val >> 11)) & 0x2000) | // J1
+                (((~(val >> 11)) ^ (val >> 13)) & 0x0800) | // J2
+                ((val >> 1) & 0x07ff));                     // imm11
     break;
   case R_ARM_MOVW_ABS_NC:
   case R_ARM_MOVW_PREL_NC:
@@ -753,40 +753,40 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_ARM_THM_MOVT_BREL:
     // Encoding T1: A = imm4:i:imm3:imm8
 
-    write16(loc,
+    write16(ctx, loc,
             0xf2c0 |                     // opcode
                 ((val >> 17) & 0x0400) | // i
                 ((val >> 28) & 0x000f)); // imm4
 
-    write16(loc + 2,
-              (read16(loc + 2) & 0x8f00) | // opcode
-                  ((val >> 12) & 0x7000) |   // imm3
-                  ((val >> 16) & 0x00ff));   // imm8
+    write16(ctx, loc + 2,
+            (read16(ctx, loc + 2) & 0x8f00) | // opcode
+                ((val >> 12) & 0x7000) |      // imm3
+                ((val >> 16) & 0x00ff));      // imm8
     break;
   case R_ARM_THM_MOVW_ABS_NC:
   case R_ARM_THM_MOVW_PREL_NC:
   case R_ARM_THM_MOVW_BREL_NC:
     // Encoding T3: A = imm4:i:imm3:imm8
-    write16(loc,
-              0xf240 |                     // opcode
-                  ((val >> 1) & 0x0400) |  // i
-                  ((val >> 12) & 0x000f)); // imm4
-    write16(loc + 2,
-              (read16(loc + 2) & 0x8f00) | // opcode
-                  ((val << 4) & 0x7000) |    // imm3
-                  (val & 0x00ff));           // imm8
+    write16(ctx, loc,
+            0xf240 |                     // opcode
+                ((val >> 1) & 0x0400) |  // i
+                ((val >> 12) & 0x000f)); // imm4
+    write16(ctx, loc + 2,
+            (read16(ctx, loc + 2) & 0x8f00) | // opcode
+                ((val << 4) & 0x7000) |       // imm3
+                (val & 0x00ff));              // imm8
     break;
   case R_ARM_THM_ALU_ABS_G3:
-    write16(loc, (read16(loc) &~ 0x00ff) | ((val >> 24) & 0x00ff));
+    write16(ctx, loc, (read16(ctx, loc) & ~0x00ff) | ((val >> 24) & 0x00ff));
     break;
   case R_ARM_THM_ALU_ABS_G2_NC:
-    write16(loc, (read16(loc) &~ 0x00ff) | ((val >> 16) & 0x00ff));
+    write16(ctx, loc, (read16(ctx, loc) & ~0x00ff) | ((val >> 16) & 0x00ff));
     break;
   case R_ARM_THM_ALU_ABS_G1_NC:
-    write16(loc, (read16(loc) &~ 0x00ff) | ((val >> 8) & 0x00ff));
+    write16(ctx, loc, (read16(ctx, loc) & ~0x00ff) | ((val >> 8) & 0x00ff));
     break;
   case R_ARM_THM_ALU_ABS_G0_NC:
-    write16(loc, (read16(loc) &~ 0x00ff) | (val & 0x00ff));
+    write16(ctx, loc, (read16(ctx, loc) & ~0x00ff) | (val & 0x00ff));
     break;
   case R_ARM_ALU_PC_G0:
     encodeAluGroup(loc, rel, val, 0, true);
@@ -830,9 +830,10 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
       sub = 0x00a0;
     }
     checkUInt(loc, imm, 12, rel);
-    write16(loc, (read16(loc) & 0xfb0f) | sub | (imm & 0x800) >> 1);
-    write16(loc + 2,
-              (read16(loc + 2) & 0x8f00) | (imm & 0x700) << 4 | (imm & 0xff));
+    write16(ctx, loc, (read16(ctx, loc) & 0xfb0f) | sub | (imm & 0x800) >> 1);
+    write16(ctx, loc + 2,
+            (read16(ctx, loc + 2) & 0x8f00) | (imm & 0x700) << 4 |
+                (imm & 0xff));
     break;
   }
   case R_ARM_THM_PC8:
@@ -844,7 +845,7 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
       val &= ~0x1;
     checkUInt(loc, val, 10, rel);
     checkAlignment(loc, val, 4, rel);
-    write16(loc, (read16(loc) & 0xff00) | (val & 0x3fc) >> 2);
+    write16(ctx, loc, (read16(ctx, loc) & 0xff00) | (val & 0x3fc) >> 2);
     break;
   case R_ARM_THM_PC12: {
     // LDR (literal) encoding T2, add = (U == '1') imm12
@@ -861,8 +862,8 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
       u = 0;
     }
     checkUInt(loc, imm12, 12, rel);
-    write16(loc, read16(loc) | u);
-    write16(loc + 2, (read16(loc + 2) & 0xf000) | imm12);
+    write16(ctx, loc, read16(ctx, loc) | u);
+    write16(ctx, loc + 2, (read16(ctx, loc + 2) & 0xf000) | imm12);
     break;
   }
   default:
@@ -905,13 +906,13 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_ARM_PLT32:
     return SignExtend64<26>(read32(ctx, buf) << 2);
   case R_ARM_THM_JUMP8:
-    return SignExtend64<9>(read16(buf) << 1);
+    return SignExtend64<9>(read16(ctx, buf) << 1);
   case R_ARM_THM_JUMP11:
-    return SignExtend64<12>(read16(buf) << 1);
+    return SignExtend64<12>(read16(ctx, buf) << 1);
   case R_ARM_THM_JUMP19: {
     // Encoding T3: A = S:J2:J1:imm10:imm6:0
-    uint16_t hi = read16(buf);
-    uint16_t lo = read16(buf + 2);
+    uint16_t hi = read16(ctx, buf);
+    uint16_t lo = read16(ctx, buf + 2);
     return SignExtend64<20>(((hi & 0x0400) << 10) | // S
                             ((lo & 0x0800) << 8) |  // J2
                             ((lo & 0x2000) << 5) |  // J1
@@ -922,8 +923,8 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
     if (!ctx.arg.armJ1J2BranchEncoding) {
       // Older Arm architectures do not support R_ARM_THM_JUMP24 and have
       // 
diff erent encoding rules and range due to J1 and J2 always being 1.
-      uint16_t hi = read16(buf);
-      uint16_t lo = read16(buf + 2);
+      uint16_t hi = read16(ctx, buf);
+      uint16_t lo = read16(ctx, buf + 2);
       return SignExtend64<22>(((hi & 0x7ff) << 12) | // imm11
                               ((lo & 0x7ff) << 1));  // imm11:0
       break;
@@ -932,8 +933,8 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_ARM_THM_JUMP24: {
     // Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0
     // I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S)
-    uint16_t hi = read16(buf);
-    uint16_t lo = read16(buf + 2);
+    uint16_t hi = read16(ctx, buf);
+    uint16_t lo = read16(ctx, buf + 2);
     return SignExtend64<24>(((hi & 0x0400) << 14) |                    // S
                             (~((lo ^ (hi << 3)) << 10) & 0x00800000) | // I1
                             (~((lo ^ (hi << 1)) << 11) & 0x00400000) | // I2
@@ -958,8 +959,8 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_ARM_THM_MOVW_BREL_NC:
   case R_ARM_THM_MOVT_BREL: {
     // Encoding T3: A = imm4:i:imm3:imm8
-    uint16_t hi = read16(buf);
-    uint16_t lo = read16(buf + 2);
+    uint16_t hi = read16(ctx, buf);
+    uint16_t lo = read16(ctx, buf + 2);
     return SignExtend64<16>(((hi & 0x000f) << 12) | // imm4
                             ((hi & 0x0400) << 1) |  // i
                             ((lo & 0x7000) >> 4) |  // imm3
@@ -969,7 +970,7 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_ARM_THM_ALU_ABS_G1_NC:
   case R_ARM_THM_ALU_ABS_G2_NC:
   case R_ARM_THM_ALU_ABS_G3:
-    return read16(buf) & 0xff;
+    return read16(ctx, buf) & 0xff;
   case R_ARM_ALU_PC_G0:
   case R_ARM_ALU_PC_G0_NC:
   case R_ARM_ALU_PC_G1:
@@ -1006,8 +1007,8 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
     // Thumb2 ADR, which is an alias for a sub or add instruction with an
     // unsigned immediate.
     // ADR encoding T2 (sub), T3 (add) i:imm3:imm8
-    uint16_t hi = read16(buf);
-    uint16_t lo = read16(buf + 2);
+    uint16_t hi = read16(ctx, buf);
+    uint16_t lo = read16(ctx, buf + 2);
     uint64_t imm = (hi & 0x0400) << 1 | // i
                    (lo & 0x7000) >> 4 | // imm3
                    (lo & 0x00ff);       // imm8
@@ -1019,11 +1020,11 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
     // From ELF for the ARM Architecture the initial signed addend is formed
     // from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) – 4)
     // this trick permits the PC bias of -4 to be encoded using imm8 = 0xff
-    return ((((read16(buf) & 0xff) << 2) + 4) & 0x3ff) - 4;
+    return ((((read16(ctx, buf) & 0xff) << 2) + 4) & 0x3ff) - 4;
   case R_ARM_THM_PC12: {
     // LDR (literal) encoding T2, add = (U == '1') imm12
-    bool u = read16(buf) & 0x0080;
-    uint64_t imm12 = read16(buf + 2) & 0x0fff;
+    bool u = read16(ctx, buf) & 0x0080;
+    uint64_t imm12 = read16(ctx, buf + 2) & 0x0fff;
     return u ? imm12 : -imm12;
   }
   case R_ARM_NONE:
@@ -1099,7 +1100,7 @@ static void toLittleEndianInstructions(uint8_t *buf, uint64_t start,
 
   if (curState == CodeState::Thumb)
     for (uint64_t i = start; i < end; i += width)
-      write16le(buf + i, read16(buf + i));
+      write16le(buf + i, read16(ctx, buf + i));
 }
 
 // Arm BE8 big endian format requires instructions to be little endian, with
@@ -1393,10 +1394,10 @@ void ArmCmseSGSection::addSGVeneer(Symbol *acleSeSym, Symbol *sym) {
 void ArmCmseSGSection::writeTo(uint8_t *buf) {
   for (ArmCmseSGVeneer *s : sgVeneers) {
     uint8_t *p = buf + s->offset;
-    write16(p + 0, 0xe97f); // SG
-    write16(p + 2, 0xe97f);
-    write16(p + 4, 0xf000); // B.W S
-    write16(p + 6, 0xb000);
+    write16(ctx, p + 0, 0xe97f); // SG
+    write16(ctx, p + 2, 0xe97f);
+    write16(ctx, p + 4, 0xf000); // B.W S
+    write16(ctx, p + 6, 0xb000);
     ctx.target->relocateNoSym(p + 4, R_ARM_THM_JUMP24,
                               s->acleSeSym->getVA() -
                                   (getVA() + s->offset + s->size));

diff  --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp
index 8736b2cc734410..d5b63497625cba 100644
--- a/lld/ELF/Arch/Mips.cpp
+++ b/lld/ELF/Arch/Mips.cpp
@@ -249,10 +249,10 @@ static void writeShuffleValue(uint8_t *loc, uint64_t v, uint8_t bitsSize,
 template <endianness E>
 static void writeMicroRelocation16(uint8_t *loc, uint64_t v, uint8_t bitsSize,
                                    uint8_t shift) {
-  uint16_t instr = read16(loc);
+  uint16_t instr = read16(ctx, loc);
   uint16_t mask = 0xffff >> (16 - bitsSize);
   uint16_t data = (instr & ~mask) | ((v >> shift) & mask);
-  write16(loc, data);
+  write16(ctx, loc, data);
 }
 
 template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *buf) const {
@@ -262,22 +262,23 @@ template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *buf) const {
     // Overwrite trap instructions written by Writer::writeTrapInstr.
     memset(buf, 0, pltHeaderSize);
 
-    write16(buf, isMipsR6(ctx) ? 0x7860 : 0x7980); // addiupc v1, (GOTPLT) - .
-    write16(buf + 4, 0xff23);    // lw      $25, 0($3)
-    write16(buf + 8, 0x0535);    // subu16  $2,  $2, $3
-    write16(buf + 10, 0x2525);   // srl16   $2,  $2, 2
-    write16(buf + 12, 0x3302);   // addiu   $24, $2, -2
-    write16(buf + 14, 0xfffe);
-    write16(buf + 16, 0x0dff);   // move    $15, $31
+    write16(ctx, buf,
+            isMipsR6(ctx) ? 0x7860 : 0x7980); // addiupc v1, (GOTPLT) - .
+    write16(ctx, buf + 4, 0xff23);            // lw      $25, 0($3)
+    write16(ctx, buf + 8, 0x0535);            // subu16  $2,  $2, $3
+    write16(ctx, buf + 10, 0x2525);           // srl16   $2,  $2, 2
+    write16(ctx, buf + 12, 0x3302);           // addiu   $24, $2, -2
+    write16(ctx, buf + 14, 0xfffe);
+    write16(ctx, buf + 16, 0x0dff); // move    $15, $31
     if (isMipsR6(ctx)) {
-      write16(buf + 18, 0x0f83); // move    $28, $3
-      write16(buf + 20, 0x472b); // jalrc   $25
-      write16(buf + 22, 0x0c00); // nop
+      write16(ctx, buf + 18, 0x0f83); // move    $28, $3
+      write16(ctx, buf + 20, 0x472b); // jalrc   $25
+      write16(ctx, buf + 22, 0x0c00); // nop
       relocateNoSym(buf, R_MICROMIPS_PC19_S2, gotPlt - plt);
     } else {
-      write16(buf + 18, 0x45f9); // jalrc   $25
-      write16(buf + 20, 0x0f83); // move    $28, $3
-      write16(buf + 22, 0x0c00); // nop
+      write16(ctx, buf + 18, 0x45f9); // jalrc   $25
+      write16(ctx, buf + 20, 0x0f83); // move    $28, $3
+      write16(ctx, buf + 22, 0x0c00); // nop
       relocateNoSym(buf, R_MICROMIPS_PC23_S2, gotPlt - plt);
     }
     return;
@@ -325,16 +326,16 @@ void MIPS<ELFT>::writePlt(uint8_t *buf, const Symbol &sym,
     memset(buf, 0, pltEntrySize);
 
     if (isMipsR6(ctx)) {
-      write16(buf, 0x7840);      // addiupc $2, (GOTPLT) - .
-      write16(buf + 4, 0xff22);  // lw $25, 0($2)
-      write16(buf + 8, 0x0f02);  // move $24, $2
-      write16(buf + 10, 0x4723); // jrc $25 / jr16 $25
+      write16(ctx, buf, 0x7840);      // addiupc $2, (GOTPLT) - .
+      write16(ctx, buf + 4, 0xff22);  // lw $25, 0($2)
+      write16(ctx, buf + 8, 0x0f02);  // move $24, $2
+      write16(ctx, buf + 10, 0x4723); // jrc $25 / jr16 $25
       relocateNoSym(buf, R_MICROMIPS_PC19_S2, gotPltEntryAddr - pltEntryAddr);
     } else {
-      write16(buf, 0x7900);      // addiupc $2, (GOTPLT) - .
-      write16(buf + 4, 0xff22);  // lw $25, 0($2)
-      write16(buf + 8, 0x4599);  // jrc $25 / jr16 $25
-      write16(buf + 10, 0x0f02); // move $24, $2
+      write16(ctx, buf, 0x7900);      // addiupc $2, (GOTPLT) - .
+      write16(ctx, buf + 4, 0xff22);  // lw $25, 0($2)
+      write16(ctx, buf + 8, 0x4599);  // jrc $25 / jr16 $25
+      write16(ctx, buf + 10, 0x0f02); // move $24, $2
       relocateNoSym(buf, R_MICROMIPS_PC23_S2, gotPltEntryAddr - pltEntryAddr);
     }
     return;
@@ -444,9 +445,9 @@ int64_t MIPS<ELFT>::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_MICROMIPS_26_S1:
     return SignExtend64<27>(readShuffle<e>(buf) << 1);
   case R_MICROMIPS_PC7_S1:
-    return SignExtend64<8>(read16(buf) << 1);
+    return SignExtend64<8>(read16(ctx, buf) << 1);
   case R_MICROMIPS_PC10_S1:
-    return SignExtend64<11>(read16(buf) << 1);
+    return SignExtend64<11>(read16(ctx, buf) << 1);
   case R_MICROMIPS_PC16_S1:
     return SignExtend64<17>(readShuffle<e>(buf) << 1);
   case R_MICROMIPS_PC18_S3:
@@ -464,9 +465,9 @@ int64_t MIPS<ELFT>::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_MIPS_TLS_DTPREL64:
   case R_MIPS_TLS_TPREL64:
   case (R_MIPS_64 << 8) | R_MIPS_REL32:
-    return read64(buf);
+    return read64(ctx, buf);
   case R_MIPS_COPY:
-    return ctx.arg.is64 ? read64(buf) : read32(ctx, buf);
+    return ctx.arg.is64 ? read64(ctx, buf) : read32(ctx, buf);
   case R_MIPS_NONE:
   case R_MIPS_JUMP_SLOT:
   case R_MIPS_JALR:
@@ -596,7 +597,7 @@ void MIPS<ELFT>::relocate(uint8_t *loc, const Relocation &rel,
   case R_MIPS_64:
   case R_MIPS_TLS_DTPREL64:
   case R_MIPS_TLS_TPREL64:
-    write64(loc, val);
+    write64(ctx, loc, val);
     break;
   case R_MIPS_26:
     writeValue(loc, val, 26, 2);

diff  --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp
index 79962d06e5553b..e9bd3ecdbdd523 100644
--- a/lld/ELF/Arch/PPC.cpp
+++ b/lld/ELF/Arch/PPC.cpp
@@ -326,7 +326,7 @@ void PPC::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   switch (newType) {
   case R_PPC_ADDR16:
     checkIntUInt(loc, val, 16, rel);
-    write16(loc, val);
+    write16(ctx, loc, val);
     break;
   case R_PPC_GOT16:
   case R_PPC_GOT_TLSGD16:
@@ -334,7 +334,7 @@ void PPC::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_PPC_GOT_TPREL16:
   case R_PPC_TPREL16:
     checkInt(loc, val, 16, rel);
-    write16(loc, val);
+    write16(ctx, loc, val);
     break;
   case R_PPC_ADDR16_HA:
   case R_PPC_DTPREL16_HA:
@@ -343,7 +343,7 @@ void PPC::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_PPC_GOT_TPREL16_HA:
   case R_PPC_REL16_HA:
   case R_PPC_TPREL16_HA:
-    write16(loc, ha(val));
+    write16(ctx, loc, ha(val));
     break;
   case R_PPC_ADDR16_HI:
   case R_PPC_DTPREL16_HI:
@@ -352,7 +352,7 @@ void PPC::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_PPC_GOT_TPREL16_HI:
   case R_PPC_REL16_HI:
   case R_PPC_TPREL16_HI:
-    write16(loc, val >> 16);
+    write16(ctx, loc, val >> 16);
     break;
   case R_PPC_ADDR16_LO:
   case R_PPC_DTPREL16_LO:
@@ -361,7 +361,7 @@ void PPC::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_PPC_GOT_TPREL16_LO:
   case R_PPC_REL16_LO:
   case R_PPC_TPREL16_LO:
-    write16(loc, val);
+    write16(ctx, loc, val);
     break;
   case R_PPC_ADDR32:
   case R_PPC_REL32:

diff  --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index 647954106322e8..f25ff53fccd8eb 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -246,7 +246,7 @@ unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) {
 
 void elf::writePrefixedInst(Ctx &ctx, uint8_t *loc, uint64_t insn) {
   insn = ctx.arg.isLE ? insn << 32 | insn >> 32 : insn;
-  write64(loc, insn);
+  write64(ctx, loc, insn);
 }
 
 static bool addOptional(Ctx &ctx, StringRef name, uint64_t value,
@@ -574,7 +574,7 @@ static uint32_t readFromHalf16(Ctx &ctx, const uint8_t *loc) {
 }
 
 static uint64_t readPrefixedInst(Ctx &ctx, const uint8_t *loc) {
-  uint64_t fullInstr = read64(loc);
+  uint64_t fullInstr = read64(ctx, loc);
   return ctx.arg.isLE ? (fullInstr << 32 | fullInstr >> 32) : fullInstr;
 }
 
@@ -1125,7 +1125,7 @@ int64_t PPC64::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_PPC64_DTPMOD64:
   case R_PPC64_DTPREL64:
   case R_PPC64_TPREL64:
-    return read64(buf);
+    return read64(ctx, buf);
   default:
     internalLinkerError(getErrorLoc(ctx, buf),
                         "cannot read addend for relocation " + toString(type));
@@ -1134,7 +1134,7 @@ int64_t PPC64::getImplicitAddend(const uint8_t *buf, RelType type) const {
 }
 
 void PPC64::writeGotHeader(uint8_t *buf) const {
-  write64(buf, getPPC64TocBase(ctx));
+  write64(ctx, buf, getPPC64TocBase(ctx));
 }
 
 void PPC64::writePltHeader(uint8_t *buf) const {
@@ -1157,7 +1157,7 @@ void PPC64::writePltHeader(uint8_t *buf) const {
   // following instruction ('mflr r11'). Here we store the offset from that
   // instruction  to the first entry in the GotPlt section.
   int64_t gotPltOffset = ctx.in.gotPlt->getVA() - (ctx.in.plt->getVA() + 8);
-  write64(buf + 52, gotPltOffset);
+  write64(ctx, buf + 52, gotPltOffset);
 }
 
 void PPC64::writePlt(uint8_t *buf, const Symbol &sym,
@@ -1269,12 +1269,12 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     checkAlignment(loc, val, 4, rel);
     // Preserve the AA/LK bits in the branch instruction
     uint8_t aalk = loc[3];
-    write16(loc + 2, (aalk & 3) | (val & 0xfffc));
+    write16(ctx, loc + 2, (aalk & 3) | (val & 0xfffc));
     break;
   }
   case R_PPC64_ADDR16:
     checkIntUInt(loc, val, 16, rel);
-    write16(loc, val);
+    write16(ctx, loc, val);
     break;
   case R_PPC64_ADDR32:
     checkIntUInt(loc, val, 32, rel);
@@ -1287,7 +1287,7 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     // DS-form instructions only use bits 30-31.
     uint16_t mask = isDQFormInstruction(readFromHalf16(ctx, loc)) ? 0xf : 0x3;
     checkAlignment(loc, lo(val), mask + 1, rel);
-    write16(loc, (read16(loc) & mask) | lo(val));
+    write16(ctx, loc, (read16(ctx, loc) & mask) | lo(val));
   } break;
   case R_PPC64_ADDR16_HA:
   case R_PPC64_REL16_HA:
@@ -1296,33 +1296,33 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
       writeFromHalf16(ctx, loc, NOP);
     else {
       checkInt(loc, val + 0x8000, 32, rel);
-      write16(loc, ha(val));
+      write16(ctx, loc, ha(val));
     }
     break;
   case R_PPC64_ADDR16_HI:
   case R_PPC64_REL16_HI:
   case R_PPC64_TPREL16_HI:
     checkInt(loc, val, 32, rel);
-    write16(loc, hi(val));
+    write16(ctx, loc, hi(val));
     break;
   case R_PPC64_ADDR16_HIGH:
-    write16(loc, hi(val));
+    write16(ctx, loc, hi(val));
     break;
   case R_PPC64_ADDR16_HIGHER:
   case R_PPC64_TPREL16_HIGHER:
-    write16(loc, higher(val));
+    write16(ctx, loc, higher(val));
     break;
   case R_PPC64_ADDR16_HIGHERA:
   case R_PPC64_TPREL16_HIGHERA:
-    write16(loc, highera(val));
+    write16(ctx, loc, highera(val));
     break;
   case R_PPC64_ADDR16_HIGHEST:
   case R_PPC64_TPREL16_HIGHEST:
-    write16(loc, highest(val));
+    write16(ctx, loc, highest(val));
     break;
   case R_PPC64_ADDR16_HIGHESTA:
   case R_PPC64_TPREL16_HIGHESTA:
-    write16(loc, highesta(val));
+    write16(ctx, loc, highesta(val));
     break;
   case R_PPC64_ADDR16_LO:
   case R_PPC64_REL16_LO:
@@ -1337,7 +1337,7 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
               "can't toc-optimize an update instruction: 0x" + utohexstr(insn));
       writeFromHalf16(ctx, loc, (insn & 0xffe00000) | 0x00020000 | lo(val));
     } else {
-      write16(loc, lo(val));
+      write16(ctx, loc, lo(val));
     }
     break;
   case R_PPC64_ADDR16_LO_DS:
@@ -1358,12 +1358,12 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
       insn &= 0xffe00000 | mask;
       writeFromHalf16(ctx, loc, insn | 0x00020000 | lo(val));
     } else {
-      write16(loc, (read16(loc) & mask) | lo(val));
+      write16(ctx, loc, (read16(ctx, loc) & mask) | lo(val));
     }
   } break;
   case R_PPC64_TPREL16:
     checkInt(loc, val, 16, rel);
-    write16(loc, val);
+    write16(ctx, loc, val);
     break;
   case R_PPC64_REL32:
     checkInt(loc, val, 32, rel);
@@ -1372,7 +1372,7 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   case R_PPC64_ADDR64:
   case R_PPC64_REL64:
   case R_PPC64_TOC:
-    write64(loc, val);
+    write64(ctx, loc, val);
     break;
   case R_PPC64_REL14: {
     uint32_t mask = 0x0000FFFC;
@@ -1390,7 +1390,7 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     break;
   }
   case R_PPC64_DTPREL64:
-    write64(loc, val - dynamicThreadPointerOffset);
+    write64(ctx, loc, val - dynamicThreadPointerOffset);
     break;
   case R_PPC64_DTPREL34:
     // The Dynamic Thread Vector actually points 0x8000 bytes past the start

diff  --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 0b9daef951b216..372f7a3b9d7b02 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -462,11 +462,11 @@ static void writeInt(uint8_t *buf, uint64_t data, uint64_t size) {
   if (size == 1)
     *buf = data;
   else if (size == 2)
-    write16(buf, data);
+    write16(ctx, buf, data);
   else if (size == 4)
     write32(ctx, buf, data);
   else if (size == 8)
-    write64(buf, data);
+    write64(ctx, buf, data);
   else
     llvm_unreachable("unsupported Size argument");
 }

diff  --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 01a7e9a7866c36..1288126328b4d2 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -60,12 +60,12 @@ using llvm::support::endian::write64le;
 constexpr size_t MergeNoTailSection::numShards;
 
 static uint64_t readUint(Ctx &ctx, uint8_t *buf) {
-  return ctx.arg.is64 ? read64(buf) : read32(ctx, buf);
+  return ctx.arg.is64 ? read64(ctx, buf) : read32(ctx, buf);
 }
 
 static void writeUint(Ctx &ctx, uint8_t *buf, uint64_t val) {
   if (ctx.arg.is64)
-    write64(buf, val);
+    write64(ctx, buf, val);
   else
     write32(ctx, buf, val);
 }
@@ -596,16 +596,16 @@ SmallVector<EhFrameSection::FdeData, 0> EhFrameSection::getFdeData() const {
 static uint64_t readFdeAddr(Ctx &ctx, uint8_t *buf, int size) {
   switch (size) {
   case DW_EH_PE_udata2:
-    return read16(buf);
+    return read16(ctx, buf);
   case DW_EH_PE_sdata2:
-    return (int16_t)read16(buf);
+    return (int16_t)read16(ctx, buf);
   case DW_EH_PE_udata4:
     return read32(ctx, buf);
   case DW_EH_PE_sdata4:
     return (int32_t)read32(ctx, buf);
   case DW_EH_PE_udata8:
   case DW_EH_PE_sdata8:
-    return read64(buf);
+    return read64(ctx, buf);
   case DW_EH_PE_absptr:
     return readUint(ctx, buf);
   }
@@ -3725,10 +3725,10 @@ void VersionDefinitionSection::writeOne(uint8_t *buf, uint32_t index,
   uint16_t flags = index == 1 ? VER_FLG_BASE : 0;
 
   // Write a verdef.
-  write16(buf, 1);                  // vd_version
-  write16(buf + 2, flags);          // vd_flags
-  write16(buf + 4, index);          // vd_ndx
-  write16(buf + 6, 1);              // vd_cnt
+  write16(ctx, buf, 1);                  // vd_version
+  write16(ctx, buf + 2, flags);          // vd_flags
+  write16(ctx, buf + 4, index);          // vd_ndx
+  write16(ctx, buf + 6, 1);              // vd_cnt
   write32(ctx, buf + 8, hashSysV(name)); // vd_hash
   write32(ctx, buf + 12, 20);            // vd_aux
   write32(ctx, buf + 16, 28);            // vd_next
@@ -3778,7 +3778,7 @@ void VersionTableSection::writeTo(uint8_t *buf) {
     // For an unextracted lazy symbol (undefined weak), it must have been
     // converted to Undefined and have VER_NDX_GLOBAL version here.
     assert(!s.sym->isLazy());
-    write16(buf, s.sym->versionId);
+    write16(ctx, buf, s.sym->versionId);
     buf += 2;
   }
 }
@@ -4358,8 +4358,9 @@ void PPC64LongBranchTargetSection::writeTo(uint8_t *buf) {
     assert(sym->getVA());
     // Need calls to branch to the local entry-point since a long-branch
     // must be a local-call.
-    write64(buf, sym->getVA(addend) +
-                     getPPC64GlobalEntryToLocalEntryOffset(sym->stOther));
+    write64(ctx, buf,
+            sym->getVA(addend) +
+                getPPC64GlobalEntryToLocalEntryOffset(sym->stOther));
     buf += 8;
   }
 }

diff  --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index f94d3cf0552a84..bdf5bae80f42b2 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -292,7 +292,7 @@ inline void checkAlignment(uint8_t *loc, uint64_t v, int n,
 }
 
 // Endianness-aware read/write.
-inline uint16_t read16(const void *p) {
+inline uint16_t read16(Ctx &ctx, const void *p) {
   return llvm::support::endian::read16(p, ctx.arg.endianness);
 }
 
@@ -300,11 +300,11 @@ inline uint32_t read32(Ctx &ctx, const void *p) {
   return llvm::support::endian::read32(p, ctx.arg.endianness);
 }
 
-inline uint64_t read64(const void *p) {
+inline uint64_t read64(Ctx &ctx, const void *p) {
   return llvm::support::endian::read64(p, ctx.arg.endianness);
 }
 
-inline void write16(void *p, uint16_t v) {
+inline void write16(Ctx &ctx, void *p, uint16_t v) {
   llvm::support::endian::write16(p, v, ctx.arg.endianness);
 }
 
@@ -312,7 +312,7 @@ inline void write32(Ctx &ctx, void *p, uint32_t v) {
   llvm::support::endian::write32(p, v, ctx.arg.endianness);
 }
 
-inline void write64(void *p, uint64_t v) {
+inline void write64(Ctx &ctx, void *p, uint64_t v) {
   llvm::support::endian::write64(p, v, ctx.arg.endianness);
 }
 

diff  --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index 16066926c860d5..611b632f826979 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -751,8 +751,8 @@ void ThumbThunk::writeTo(uint8_t *buf) {
   uint64_t s = getARMThunkDestVA(ctx, destination);
   uint64_t p = getThunkTargetSym()->getVA();
   int64_t offset = s - p - 4;
-  write16(buf + 0, 0xf000); // b.w S
-  write16(buf + 2, 0xb000);
+  write16(ctx, buf + 0, 0xf000); // b.w S
+  write16(ctx, buf + 2, 0xb000);
   ctx.target->relocateNoSym(buf, R_ARM_THM_JUMP24, offset);
 }
 
@@ -782,11 +782,11 @@ void ARMV7ABSLongThunk::addSymbols(ThunkSection &isec) {
 }
 
 void ThumbV7ABSLongThunk::writeLong(uint8_t *buf) {
-  write16(buf + 0, 0xf240); // movw ip, :lower16:S
-  write16(buf + 2, 0x0c00);
-  write16(buf + 4, 0xf2c0); // movt ip, :upper16:S
-  write16(buf + 6, 0x0c00);
-  write16(buf + 8, 0x4760); // bx   ip
+  write16(ctx, buf + 0, 0xf240); // movw ip, :lower16:S
+  write16(ctx, buf + 2, 0x0c00);
+  write16(ctx, buf + 4, 0xf2c0); // movt ip, :upper16:S
+  write16(ctx, buf + 6, 0x0c00);
+  write16(ctx, buf + 8, 0x4760); // bx   ip
   uint64_t s = getARMThunkDestVA(ctx, destination);
   ctx.target->relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, s);
   ctx.target->relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, s);
@@ -819,12 +819,12 @@ void ARMV7PILongThunk::addSymbols(ThunkSection &isec) {
 }
 
 void ThumbV7PILongThunk::writeLong(uint8_t *buf) {
-  write16(buf + 0, 0xf64f);   // P:  movw ip,:lower16:S - (P + (L1-P) + 4)
-  write16(buf + 2, 0x7cf4);
-  write16(buf + 4, 0xf2c0);   //     movt ip,:upper16:S - (P + (L1-P) + 4)
-  write16(buf + 6, 0x0c00);
-  write16(buf + 8, 0x44fc);   // L1: add  ip, pc
-  write16(buf + 10, 0x4760);  //     bx   ip
+  write16(ctx, buf + 0, 0xf64f); // P:  movw ip,:lower16:S - (P + (L1-P) + 4)
+  write16(ctx, buf + 2, 0x7cf4);
+  write16(ctx, buf + 4, 0xf2c0); //     movt ip,:upper16:S - (P + (L1-P) + 4)
+  write16(ctx, buf + 6, 0x0c00);
+  write16(ctx, buf + 8, 0x44fc);  // L1: add  ip, pc
+  write16(ctx, buf + 10, 0x4760); //     bx   ip
   uint64_t s = getARMThunkDestVA(ctx, destination);
   uint64_t p = getThunkTargetSym()->getVA() & ~0x1;
   int64_t offset = s - p - 12;
@@ -843,10 +843,10 @@ void ThumbV6MABSLongThunk::writeLong(uint8_t *buf) {
   // only register we can corrupt is r12 we must instead spill a low register
   // to the stack to use as a scratch register. We push r1 even though we
   // don't need to get some space to use for the return address.
-  write16(buf + 0, 0xb403);   // push {r0, r1} ; Obtain scratch registers
-  write16(buf + 2, 0x4801);   // ldr r0, [pc, #4] ; L1
-  write16(buf + 4, 0x9001);   // str r0, [sp, #4] ; SP + 4 = S
-  write16(buf + 6, 0xbd01);   // pop {r0, pc} ; restore r0 and branch to dest
+  write16(ctx, buf + 0, 0xb403); // push {r0, r1} ; Obtain scratch registers
+  write16(ctx, buf + 2, 0x4801); // ldr r0, [pc, #4] ; L1
+  write16(ctx, buf + 4, 0x9001); // str r0, [sp, #4] ; SP + 4 = S
+  write16(ctx, buf + 6, 0xbd01); // pop {r0, pc} ; restore r0 and branch to dest
   write32(ctx, buf + 8, 0x00000000); // L1: .word S
   uint64_t s = getARMThunkDestVA(ctx, destination);
   ctx.target->relocateNoSym(buf + 8, R_ARM_ABS32, s);
@@ -865,16 +865,17 @@ void ThumbV6MABSXOLongThunk::writeLong(uint8_t *buf) {
   // only register we can corrupt is r12 we must instead spill a low register
   // to the stack to use as a scratch register. We push r1 even though we
   // don't need to get some space to use for the return address.
-  write16(buf + 0, 0xb403);  // push {r0, r1} ; Obtain scratch registers
-  write16(buf + 2, 0x2000);  // movs r0, :upper8_15:S
-  write16(buf + 4, 0x0200);  // lsls r0, r0, #8
-  write16(buf + 6, 0x3000);  // adds r0, :upper0_7:S
-  write16(buf + 8, 0x0200);  // lsls r0, r0, #8
-  write16(buf + 10, 0x3000); // adds r0, :lower8_15:S
-  write16(buf + 12, 0x0200); // lsls r0, r0, #8
-  write16(buf + 14, 0x3000); // adds r0, :lower0_7:S
-  write16(buf + 16, 0x9001); // str r0, [sp, #4] ; SP + 4 = S
-  write16(buf + 18, 0xbd01); // pop {r0, pc} ; restore r0 and branch to dest
+  write16(ctx, buf + 0, 0xb403);  // push {r0, r1} ; Obtain scratch registers
+  write16(ctx, buf + 2, 0x2000);  // movs r0, :upper8_15:S
+  write16(ctx, buf + 4, 0x0200);  // lsls r0, r0, #8
+  write16(ctx, buf + 6, 0x3000);  // adds r0, :upper0_7:S
+  write16(ctx, buf + 8, 0x0200);  // lsls r0, r0, #8
+  write16(ctx, buf + 10, 0x3000); // adds r0, :lower8_15:S
+  write16(ctx, buf + 12, 0x0200); // lsls r0, r0, #8
+  write16(ctx, buf + 14, 0x3000); // adds r0, :lower0_7:S
+  write16(ctx, buf + 16, 0x9001); // str r0, [sp, #4] ; SP + 4 = S
+  write16(ctx, buf + 18,
+          0xbd01); // pop {r0, pc} ; restore r0 and branch to dest
   uint64_t s = getARMThunkDestVA(ctx, destination);
   ctx.target->relocateNoSym(buf + 2, R_ARM_THM_ALU_ABS_G3, s);
   ctx.target->relocateNoSym(buf + 6, R_ARM_THM_ALU_ABS_G2_NC, s);
@@ -892,12 +893,15 @@ void ThumbV6MPILongThunk::writeLong(uint8_t *buf) {
   // Most Thumb instructions cannot access the high registers r8 - r15. As the
   // only register we can corrupt is ip (r12) we must instead spill a low
   // register to the stack to use as a scratch register.
-  write16(buf + 0, 0xb401);   // P:  push {r0}        ; Obtain scratch register
-  write16(buf + 2, 0x4802);   //     ldr r0, [pc, #8] ; L2
-  write16(buf + 4, 0x4684);   //     mov ip, r0       ; high to low register
-  write16(buf + 6, 0xbc01);   //     pop {r0}         ; restore scratch register
-  write16(buf + 8, 0x44e7);   // L1: add pc, ip       ; transfer control
-  write16(buf + 10, 0x46c0);  //     nop              ; pad to 4-byte boundary
+  write16(ctx, buf + 0,
+          0xb401); // P:  push {r0}        ; Obtain scratch register
+  write16(ctx, buf + 2, 0x4802); //     ldr r0, [pc, #8] ; L2
+  write16(ctx, buf + 4, 0x4684); //     mov ip, r0       ; high to low register
+  write16(ctx, buf + 6,
+          0xbc01); //     pop {r0}         ; restore scratch register
+  write16(ctx, buf + 8, 0x44e7); // L1: add pc, ip       ; transfer control
+  write16(ctx, buf + 10,
+          0x46c0); //     nop              ; pad to 4-byte boundary
   write32(ctx, buf + 12, 0x00000000); // L2: .word S - (P + (L1 - P) + 4)
   uint64_t s = getARMThunkDestVA(ctx, destination);
   uint64_t p = getThunkTargetSym()->getVA() & ~0x1;
@@ -944,8 +948,9 @@ void ARMV4ABSLongBXThunk::addSymbols(ThunkSection &isec) {
 }
 
 void ThumbV4ABSLongBXThunk::writeLong(uint8_t *buf) {
-  write16(buf + 0, 0x4778); // bx pc
-  write16(buf + 2, 0xe7fd); // b #-6 ; Arm recommended sequence to follow bx pc
+  write16(ctx, buf + 0, 0x4778); // bx pc
+  write16(ctx, buf + 2,
+          0xe7fd); // b #-6 ; Arm recommended sequence to follow bx pc
   write32(ctx, buf + 4, 0xe51ff004); // ldr pc, [pc, #-4] ; L1
   write32(ctx, buf + 8, 0x00000000); // L1: .word S
   ctx.target->relocateNoSym(buf + 8, R_ARM_ABS32,
@@ -962,8 +967,9 @@ void ThumbV4ABSLongBXThunk::addSymbols(ThunkSection &isec) {
 }
 
 void ThumbV4ABSLongThunk::writeLong(uint8_t *buf) {
-  write16(buf + 0, 0x4778); // bx pc
-  write16(buf + 2, 0xe7fd); // b #-6 ; Arm recommended sequence to follow bx pc
+  write16(ctx, buf + 0, 0x4778); // bx pc
+  write16(ctx, buf + 2,
+          0xe7fd); // b #-6 ; Arm recommended sequence to follow bx pc
   write32(ctx, buf + 4, 0xe59fc000);  // ldr r12, [pc] ; L1
   write32(ctx, buf + 8, 0xe12fff1c);  // bx r12
   write32(ctx, buf + 12, 0x00000000); // L1: .word S
@@ -1016,8 +1022,9 @@ void ARMV4PILongThunk::addSymbols(ThunkSection &isec) {
 }
 
 void ThumbV4PILongBXThunk::writeLong(uint8_t *buf) {
-  write16(buf + 0, 0x4778); // P:  bx pc
-  write16(buf + 2, 0xe7fd); //     b #-6 ; Arm recommended sequence to follow bx pc
+  write16(ctx, buf + 0, 0x4778); // P:  bx pc
+  write16(ctx, buf + 2,
+          0xe7fd); //     b #-6 ; Arm recommended sequence to follow bx pc
   write32(ctx, buf + 4, 0xe59fc000);  //     ldr r12, [pc] ; L2
   write32(ctx, buf + 8, 0xe08cf00f);  // L1: add pc, r12, pc
   write32(ctx, buf + 12, 0x00000000); // L2: .word S - (P + (L1 - P) + 8)
@@ -1036,8 +1043,9 @@ void ThumbV4PILongBXThunk::addSymbols(ThunkSection &isec) {
 }
 
 void ThumbV4PILongThunk::writeLong(uint8_t *buf) {
-  write16(buf + 0, 0x4778); // P:  bx pc
-  write16(buf + 2, 0xe7fd); //     b #-6 ; Arm recommended sequence to follow bx pc
+  write16(ctx, buf + 0, 0x4778); // P:  bx pc
+  write16(ctx, buf + 2,
+          0xe7fd); //     b #-6 ; Arm recommended sequence to follow bx pc
   write32(ctx, buf + 4, 0xe59fc004);  //     ldr ip, [pc,#4] ; L2
   write32(ctx, buf + 8, 0xe08fc00c);  // L1: add ip, pc, ip
   write32(ctx, buf + 12, 0xe12fff1c); //     bx ip
@@ -1092,10 +1100,10 @@ InputSection *MipsThunk::getTargetInputSection() const {
 // to call PIC function from the non-PIC one.
 void MicroMipsThunk::writeTo(uint8_t *buf) {
   uint64_t s = destination.getVA();
-  write16(buf, 0x41b9);       // lui   $25, %hi(func)
-  write16(buf + 4, 0xd400);   // j     func
-  write16(buf + 8, 0x3339);   // addiu $25, $25, %lo(func)
-  write16(buf + 12, 0x0c00);  // nop
+  write16(ctx, buf, 0x41b9);      // lui   $25, %hi(func)
+  write16(ctx, buf + 4, 0xd400);  // j     func
+  write16(ctx, buf + 8, 0x3339);  // addiu $25, $25, %lo(func)
+  write16(ctx, buf + 12, 0x0c00); // nop
   ctx.target->relocateNoSym(buf, R_MICROMIPS_HI16, s);
   ctx.target->relocateNoSym(buf + 4, R_MICROMIPS_26_S1, s);
   ctx.target->relocateNoSym(buf + 8, R_MICROMIPS_LO16, s);
@@ -1118,9 +1126,9 @@ InputSection *MicroMipsThunk::getTargetInputSection() const {
 void MicroMipsR6Thunk::writeTo(uint8_t *buf) {
   uint64_t s = destination.getVA();
   uint64_t p = getThunkTargetSym()->getVA();
-  write16(buf, 0x1320);       // lui   $25, %hi(func)
-  write16(buf + 4, 0x3339);   // addiu $25, $25, %lo(func)
-  write16(buf + 8, 0x9400);   // bc    func
+  write16(ctx, buf, 0x1320);     // lui   $25, %hi(func)
+  write16(ctx, buf + 4, 0x3339); // addiu $25, $25, %lo(func)
+  write16(ctx, buf + 8, 0x9400); // bc    func
   ctx.target->relocateNoSym(buf, R_MICROMIPS_HI16, s);
   ctx.target->relocateNoSym(buf + 4, R_MICROMIPS_LO16, s);
   ctx.target->relocateNoSym(buf + 8, R_MICROMIPS_PC26_S1, s - p - 12);