[lld] [lld][ELF] Add range extension thunks for x86-64 (PR #180266)

Wed Feb 11 06:47:12 PST 2026

================
@@ -1234,6 +1283,97 @@ void ThumbV4PILongThunk::addLongMapSyms() {
   addSymbol("$d", STT_NOTYPE, 16, *tsec);
 }
 
+// x86-64 Thunk base class.
+// For x86-64, the thunk's addend comes from the original R_X86_64_PLT32
+// relocation. That addend includes a -4 PC-relative compensation (since the
+// CPU adds 4 for the displacement field size during PC-relative addressing).
+// When computing the thunk's jump target, we must add back this +4 to get
+// the actual destination address. For example, a call to a local symbol at
+// section+0x100 produces addend = 0x100 - 4 = 0xFC, so the thunk target
+// should be section.getVA(0xFC + 4) = section + 0x100.
+static uint64_t getX86_64ThunkDestVA(Ctx &ctx, const Symbol &s, int64_t a) {
+  if (s.isInPlt(ctx))
+    return s.getPltVA(ctx);
+  // Add 4 to undo the -4 PC-relative compensation in the addend.
+  return s.getVA(ctx, a + 4);
+}
+
+bool X86_64Thunk::getMayUseShortThunk() {
+  if (!mayUseShortThunk)
+    return false;
+  uint64_t s = getX86_64ThunkDestVA(ctx, destination, addend);
+  uint64_t p = getThunkTargetSym()->getVA(ctx);
+  // The jmp rel32 instruction is 5 bytes, so we check (target - (thunk + 5)).
+  mayUseShortThunk = llvm::isInt<32>(s - p - 5);
+  return mayUseShortThunk;
+}
+
+void X86_64Thunk::writeTo(uint8_t *buf) {
+  if (!getMayUseShortThunk()) {
+    writeLong(buf);
+    return;
+  }
+  // Short thunk: jmp rel32 (5 bytes)
+  uint64_t s = getX86_64ThunkDestVA(ctx, destination, addend);
+  uint64_t p = getThunkTargetSym()->getVA(ctx);
+  buf[0] = 0xe9; // jmp rel32
+  write32le(buf + 1, static_cast<uint32_t>(s - p - 5));
+}
+
+// x86-64 long range thunk implementation.
+// Uses a position-independent RIP-relative offset sequence:
+//   movabsq $offset, %r11  ; 49 BB xx xx xx xx xx xx xx xx  (10 bytes)
+//   leaq (%rip), %r10      ; 4C 8D 15 00 00 00 00          (7 bytes)
+//   addq %r10, %r11        ; 4D 01 D3                      (3 bytes)
+//   jmp *%r11              ; 41 FF E3                      (3 bytes)
+//
+// The leaq captures the RIP (address of the next instruction, i.e. addq),
+// so offset = target - (thunk_address + 10 + 7) = target - (thunk_address +
+// 17).
+void X86_64LongThunk::writeLong(uint8_t *buf) {
+  // movabsq $offset, %r11
+  buf[0] = 0x49;
+  buf[1] = 0xbb;
+
+  uint64_t target = getX86_64ThunkDestVA(ctx, destination, addend);
+  uint64_t thunkAddr = getThunkTargetSym()->getVA(ctx);
+  // RIP after leaq points to the addq instruction at thunkAddr + 17.
+  uint64_t offset = target - (thunkAddr + 17);
+  write64le(buf + 2, offset);
+
+  // leaq (%rip), %r10  ; RIP-relative with 0 displacement
+  buf[10] = 0x4c;         // REX.WR prefix (W=1, R=1 for r10)
+  buf[11] = 0x8d;         // lea
+  buf[12] = 0x15;         // ModRM: mod=00, reg=2 (r10), rm=5 (RIP-relative)
+  write32le(buf + 13, 0); // disp32 = 0
+
+  // addq %r10, %r11
+  buf[17] = 0x4d; // REX.WRB prefix (W=1, R=1 for r10, B=1 for r11)
+  buf[18] = 0x01; // add r/m64, r64
+  buf[19] = 0xd3; // ModRM: mod=11, reg=2 (r10), rm=3 (r11)
+
+  // jmp *%r11
+  buf[20] = 0x41; // REX.B prefix
+  buf[21] = 0xff; // jmp r/m64
+  buf[22] = 0xe3; // ModRM: mod=11, reg=4 (jmp), rm=3 (r11)
+}
+
+void X86_64LongThunk::addSymbols(ThunkSection &isec) {
+  StringRef name = destination.getName();
+  // When the destination is a STT_SECTION symbol (e.g. from a relocation
+  // against a local symbol), the name may be empty. Include the addend in
+  // the thunk name to disambiguate thunks targeting different offsets within
+  // the same section. We add 4 to display the actual offset (undoing the -4
+  // PC-relative compensation baked into x86-64 R_X86_64_PLT32 addends).
+  if (name.empty() || destination.isSection()) {
+    addSymbol(ctx.saver.save("__X86_64LongThunk_" + name + "_" +
----------------
smithp35 wrote:

The section symbol often gets no name, you may want to get the section name rather than the section symbol name. I also think the addend would be with respect to the input section rather than the output section. If I'm right there could be multiple _name_<addend> with the same offset. For example, if `-ffunction-sections` weren't used, there would be lots of `.text` sections 
```
.text /* output section */
  .text /* input section from foo.o */
  .text /* input section from bar.o */
```

For Arm/AArch64 we didn't bother accounting for this. This does lead to a lot of identical thunk names with an empty prefix. It is usually easy to look at the code to work out the target in the disassembly. 

https://github.com/llvm/llvm-project/pull/180266