[lld] [lld] Support thumb PLTs (PR #86223)

Sun Mar 24 16:02:38 PDT 2024

================
@@ -231,36 +231,67 @@ static void writePltHeaderLong(uint8_t *buf) {
 // The default PLT header requires the .got.plt to be within 128 Mb of the
 // .plt in the positive direction.
 void ARM::writePltHeader(uint8_t *buf) const {
-  // Use a similar sequence to that in writePlt(), the difference is the calling
-  // conventions mean we use lr instead of ip. The PLT entry is responsible for
-  // saving lr on the stack, the dynamic loader is responsible for reloading
-  // it.
-  const uint32_t pltData[] = {
-      0xe52de004, // L1: str lr, [sp,#-4]!
-      0xe28fe600, //     add lr, pc,  #0x0NN00000 &(.got.plt - L1 - 4)
-      0xe28eea00, //     add lr, lr,  #0x000NN000 &(.got.plt - L1 - 4)
-      0xe5bef000, //     ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
-  };
-
-  uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
-  if (!llvm::isUInt<27>(offset)) {
-    // We cannot encode the Offset, use the long form.
-    writePltHeaderLong(buf);
-    return;
+  if (!config->armThumbPLTs) {
+    // Use a similar sequence to that in writePlt(), the difference is the calling
+    // conventions mean we use lr instead of ip. The PLT entry is responsible for
+    // saving lr on the stack, the dynamic loader is responsible for reloading
+    // it.
+    const uint32_t pltData[] = {
+        0xe52de004, // L1: str lr, [sp,#-4]!
+        0xe28fe600, //     add lr, pc,  #0x0NN00000 &(.got.plt - L1 - 4)
+        0xe28eea00, //     add lr, lr,  #0x000NN000 &(.got.plt - L1 - 4)
+        0xe5bef000, //     ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
+    };
+
+    uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
+    if (!llvm::isUInt<27>(offset)) {
+      // We cannot encode the Offset, use the long form.
+      writePltHeaderLong(buf);
+      return;
+    }
+    write32(buf + 0, pltData[0]);
+    write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
+    write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
+    write32(buf + 12, pltData[3] | (offset & 0xfff));
+    memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
+    memcpy(buf + 20, trapInstr.data(), 4);
+    memcpy(buf + 24, trapInstr.data(), 4);
+    memcpy(buf + 28, trapInstr.data(), 4);
+  } else {
+    // The instruction sequence for thumb:
+    //
+    // 0: b500          push    {lr}
+    // 2: f8df e008     ldr.w   lr, [pc, #0x8]          @ 0xe <func+0xe>
+    // 6: 44fe          add     lr, pc
+    // 8: f85e ff08     ldr     pc, [lr, #8]!
+    // e:               .word   .got.plt - .plt - 16
+    //
+    // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from
+    // `pc` in the add instruction and 8 bytes for the `lr` adjustment.
+    //
+    uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16;
+    assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
+    write16(buf + 0, 0xb500);
+    write32(buf + 2, 0xe008f8df);
----------------
smithp35 wrote:

I don't think this works big-endian as the 4-byte instructions are 2 half-word pairs. I.e. instead of `0xff08f85e` this is `0xff08` `0xf85e` so when endian reversed we get `0x08ff` `0x5ef8` which is different from `5ef808ff`.
 
You'll need to use 2 write16() calls with half of the instruction in each. See the Thumb implementations in Thunks.cpp.

The way Arm (32-bit) does big-endian is strange, even though Cortex-M33 will always use little-endian instructions in the executable ELF file, they will be big-endian in the ELF relocatable ELF file and then reversed at prior to writing.


https://github.com/llvm/llvm-project/pull/86223