[lld] e35929e - [LLD][ELF][ARM] Refactor inBranchRange to use addend for PC Bias

Peter Smith via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 2 03:06:04 PST 2021


Author: Peter Smith
Date: 2021-03-02T11:02:33Z
New Revision: e35929e02664090667b0f9b8f4633fa005636f68

URL: https://github.com/llvm/llvm-project/commit/e35929e02664090667b0f9b8f4633fa005636f68
DIFF: https://github.com/llvm/llvm-project/commit/e35929e02664090667b0f9b8f4633fa005636f68.diff

LOG: [LLD][ELF][ARM] Refactor inBranchRange to use addend for PC Bias

In AArch32 ARM, the PC reads two instructions ahead of the currently
executiing instruction. This evaluates to 8 in ARM state and 4 in
Thumb state. Branch instructions on AArch32 compensate for this by
subtracting the PC bias from the addend. For a branch to symbol this
will result in an addend of -8 in ARM state and -4 in Thumb state.

The existing ARM Target::inBranchRange function accounted for this
implict addend within the function meaning that if the addend were
to be taken into account by the caller then it would be double
counted. This complicates the interface for all Targets as callers
wanting to account for addends had to account for the ARM PC-bias.

In certain situations such as:
https://github.com/ClangBuiltLinux/linux/issues/1305
the PC-bias compensation code didn't match up. In particular
normalizeExistingThunk() didn't put the PC-bias back in as Arm
thunks did not store the addend.

The simplest fix for the problem is to add the PC bias in
normalizeExistingThunk when restoring the addend. However I think
it is worth refactoring the Arm inBranchRange implementation so
that fewer calls to getPCBias are needed for other Targets. I
wasn't able to remove getPCBias completely but hopefully the
Relocations.cpp code is simpler now.

In principle a test could be written to replicate the linux kernel
build failure but I wasn't able to reproduce with a small example
that I could build up from scratch.

Fixes https://github.com/ClangBuiltLinux/linux/issues/1305

Differential Revision: https://reviews.llvm.org/D97550

Added: 
    lld/test/ELF/arm-thunk-arm-thumb-reuse.s

Modified: 
    lld/ELF/Arch/ARM.cpp
    lld/ELF/Relocations.cpp
    lld/ELF/Relocations.h
    lld/ELF/Thunks.cpp

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index e90e7efd34bd..13a7001e2296 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -279,7 +279,7 @@ void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {
 
 bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
                      uint64_t branchAddr, const Symbol &s,
-                     int64_t /*a*/) const {
+                     int64_t a) const {
   // If S is an undefined weak symbol and does not have a PLT entry then it
   // will be resolved as a branch to the next instruction.
   if (s.isUndefWeak() && !s.isInPlt())
@@ -298,7 +298,7 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
     LLVM_FALLTHROUGH;
   case R_ARM_CALL: {
     uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA();
-    return !inBranchRange(type, branchAddr, dst);
+    return !inBranchRange(type, branchAddr, dst + a);
   }
   case R_ARM_THM_JUMP19:
   case R_ARM_THM_JUMP24:
@@ -309,7 +309,7 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
     LLVM_FALLTHROUGH;
   case R_ARM_THM_CALL: {
     uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA();
-    return !inBranchRange(type, branchAddr, dst);
+    return !inBranchRange(type, branchAddr, dst + a);
   }
   }
   return false;
@@ -350,46 +350,31 @@ uint32_t ARM::getThunkSectionSpacing() const {
 }
 
 bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
-  uint64_t range;
-  uint64_t instrSize;
+  if ((dst & 0x1) == 0)
+    // Destination is ARM, if ARM caller then Src is already 4-byte aligned.
+    // If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure
+    // destination will be 4 byte aligned.
+    src &= ~0x3;
+  else
+    // Bit 0 == 1 denotes Thumb state, it is not part of the range.
+    dst &= ~0x1;
 
+  int64_t offset = dst - src;
   switch (type) {
   case R_ARM_PC24:
   case R_ARM_PLT32:
   case R_ARM_JUMP24:
   case R_ARM_CALL:
-    range = 0x2000000;
-    instrSize = 4;
-    break;
+    return llvm::isInt<26>(offset);
   case R_ARM_THM_JUMP19:
-    range = 0x100000;
-    instrSize = 2;
-    break;
+    return llvm::isInt<21>(offset);
   case R_ARM_THM_JUMP24:
   case R_ARM_THM_CALL:
-    range = config->armJ1J2BranchEncoding ? 0x1000000 : 0x400000;
-    instrSize = 2;
-    break;
+    return config->armJ1J2BranchEncoding ? llvm::isInt<25>(offset)
+                                         : llvm::isInt<23>(offset);
   default:
     return true;
   }
-  // PC at Src is 2 instructions ahead, immediate of branch is signed
-  if (src > dst)
-    range -= 2 * instrSize;
-  else
-    range += instrSize;
-
-  if ((dst & 0x1) == 0)
-    // Destination is ARM, if ARM caller then Src is already 4-byte aligned.
-    // If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure
-    // destination will be 4 byte aligned.
-    src &= ~0x3;
-  else
-    // Bit 0 == 1 denotes Thumb state, it is not part of the range
-    dst &= ~0x1;
-
-  uint64_t distance = (src > dst) ? src - dst : dst - src;
-  return distance <= range;
 }
 
 // Helper to produce message text when LLD detects that a CALL relocation to

diff  --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index b4847dace4f7..8e4af09cfe02 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1763,14 +1763,17 @@ void ThunkCreator::mergeThunks(ArrayRef<OutputSection *> outputSections) {
 // Find or create a ThunkSection within the InputSectionDescription (ISD) that
 // is in range of Src. An ISD maps to a range of InputSections described by a
 // linker script section pattern such as { .text .text.* }.
-ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *os, InputSection *isec,
+ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *os,
+                                           InputSection *isec,
                                            InputSectionDescription *isd,
-                                           uint32_t type, uint64_t src) {
+                                           const Relocation &rel,
+                                           uint64_t src) {
   for (std::pair<ThunkSection *, uint32_t> tp : isd->thunkSections) {
     ThunkSection *ts = tp.first;
-    uint64_t tsBase = os->addr + ts->outSecOff;
-    uint64_t tsLimit = tsBase + ts->getSize();
-    if (target->inBranchRange(type, src, (src > tsLimit) ? tsBase : tsLimit))
+    uint64_t tsBase = os->addr + ts->outSecOff + rel.addend;
+    uint64_t tsLimit = tsBase + ts->getSize() + rel.addend;
+    if (target->inBranchRange(rel.type, src,
+                              (src > tsLimit) ? tsBase : tsLimit))
       return ts;
   }
 
@@ -1780,9 +1783,11 @@ ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *os, InputSection *isec
   // possible. Error if InputSection is so large we cannot place ThunkSection
   // anywhere in Range.
   uint64_t thunkSecOff = isec->outSecOff;
-  if (!target->inBranchRange(type, src, os->addr + thunkSecOff)) {
+  if (!target->inBranchRange(rel.type, src,
+                             os->addr + thunkSecOff + rel.addend)) {
     thunkSecOff = isec->outSecOff + isec->getSize();
-    if (!target->inBranchRange(type, src, os->addr + thunkSecOff))
+    if (!target->inBranchRange(rel.type, src,
+                               os->addr + thunkSecOff + rel.addend))
       fatal("InputSection too large for range extension thunk " +
             isec->getObjMsg(src - (os->addr + isec->outSecOff)));
   }
@@ -1933,7 +1938,11 @@ static int64_t getPCBias(RelType type) {
 std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec,
                                                 Relocation &rel, uint64_t src) {
   std::vector<Thunk *> *thunkVec = nullptr;
-  int64_t addend = rel.addend + getPCBias(rel.type);
+  // Arm and Thumb have a PC Bias of 8 and 4 respectively, this is cancelled
+  // out in the relocation addend. We compensate for the PC bias so that
+  // an Arm and Thumb relocation to the same destination get the same keyAddend,
+  // which is usually 0.
+  int64_t keyAddend = rel.addend + getPCBias(rel.type);
 
   // We use a ((section, offset), addend) pair to find the thunk position if
   // possible so that we create only one thunk for aliased symbols or ICFed
@@ -1943,17 +1952,16 @@ std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec,
   if (auto *d = dyn_cast<Defined>(rel.sym))
     if (!d->isInPlt() && d->section)
       thunkVec = &thunkedSymbolsBySectionAndAddend[{
-          {d->section->repl, d->value}, addend}];
+          {d->section->repl, d->value}, keyAddend}];
   if (!thunkVec)
-    thunkVec = &thunkedSymbols[{rel.sym, addend}];
+    thunkVec = &thunkedSymbols[{rel.sym, keyAddend}];
 
   // Check existing Thunks for Sym to see if they can be reused
   for (Thunk *t : *thunkVec)
     if (isThunkSectionCompatible(isec, t->getThunkTargetSym()->section) &&
         t->isCompatibleWith(*isec, rel) &&
         target->inBranchRange(rel.type, src,
-                              t->getThunkTargetSym()->getVA(rel.addend) +
-                                  getPCBias(rel.type)))
+                              t->getThunkTargetSym()->getVA(rel.addend)))
       return std::make_pair(t, false);
 
   // No existing compatible Thunk in range, create a new one
@@ -1968,8 +1976,7 @@ std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec,
 // relocation back to its original non-Thunk target.
 bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) {
   if (Thunk *t = thunks.lookup(rel.sym)) {
-    if (target->inBranchRange(rel.type, src,
-                              rel.sym->getVA(rel.addend) + getPCBias(rel.type)))
+    if (target->inBranchRange(rel.type, src, rel.sym->getVA(rel.addend)))
       return true;
     rel.sym = &t->destination;
     rel.addend = t->addend;
@@ -2041,7 +2048,7 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) {
               if (auto *tis = t->getTargetInputSection())
                 ts = getISThunkSec(tis);
               else
-                ts = getISDThunkSec(os, isec, isd, rel.type, src);
+                ts = getISDThunkSec(os, isec, isd, rel, src);
               ts->addThunk(t);
               thunks[t->getThunkTargetSym()] = t;
             }

diff  --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 2c7d790d154c..dd0c9c385d12 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -148,8 +148,8 @@ class ThunkCreator {
   void mergeThunks(ArrayRef<OutputSection *> outputSections);
 
   ThunkSection *getISDThunkSec(OutputSection *os, InputSection *isec,
-                               InputSectionDescription *isd, uint32_t type,
-                               uint64_t src);
+                               InputSectionDescription *isd,
+                               const Relocation &rel, uint64_t src);
 
   ThunkSection *getISThunkSec(InputSection *isec);
 

diff  --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index 6ac895051389..1fb3abaa60f5 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -72,7 +72,7 @@ class AArch64ADRPThunk final : public Thunk {
 // if the target is in range, otherwise it creates a long thunk.
 class ARMThunk : public Thunk {
 public:
-  ARMThunk(Symbol &dest) : Thunk(dest, 0) {}
+  ARMThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {}
 
   bool getMayUseShortThunk();
   uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); }
@@ -102,7 +102,9 @@ class ARMThunk : public Thunk {
 // which has a range of 16MB.
 class ThumbThunk : public Thunk {
 public:
-  ThumbThunk(Symbol &dest) : Thunk(dest, 0) { alignment = 2; }
+  ThumbThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {
+    alignment = 2;
+  }
 
   bool getMayUseShortThunk();
   uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); }
@@ -125,7 +127,7 @@ class ThumbThunk : public Thunk {
 // Source State, TargetState, Target Requirement, ABS or PI, Range
 class ARMV7ABSLongThunk final : public ARMThunk {
 public:
-  ARMV7ABSLongThunk(Symbol &dest) : ARMThunk(dest) {}
+  ARMV7ABSLongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {}
 
   uint32_t sizeLong() override { return 12; }
   void writeLong(uint8_t *buf) override;
@@ -134,7 +136,7 @@ class ARMV7ABSLongThunk final : public ARMThunk {
 
 class ARMV7PILongThunk final : public ARMThunk {
 public:
-  ARMV7PILongThunk(Symbol &dest) : ARMThunk(dest) {}
+  ARMV7PILongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {}
 
   uint32_t sizeLong() override { return 16; }
   void writeLong(uint8_t *buf) override;
@@ -143,7 +145,8 @@ class ARMV7PILongThunk final : public ARMThunk {
 
 class ThumbV7ABSLongThunk final : public ThumbThunk {
 public:
-  ThumbV7ABSLongThunk(Symbol &dest) : ThumbThunk(dest) {}
+  ThumbV7ABSLongThunk(Symbol &dest, int64_t addend)
+      : ThumbThunk(dest, addend) {}
 
   uint32_t sizeLong() override { return 10; }
   void writeLong(uint8_t *buf) override;
@@ -152,7 +155,7 @@ class ThumbV7ABSLongThunk final : public ThumbThunk {
 
 class ThumbV7PILongThunk final : public ThumbThunk {
 public:
-  ThumbV7PILongThunk(Symbol &dest) : ThumbThunk(dest) {}
+  ThumbV7PILongThunk(Symbol &dest, int64_t addend) : ThumbThunk(dest, addend) {}
 
   uint32_t sizeLong() override { return 12; }
   void writeLong(uint8_t *buf) override;
@@ -166,7 +169,7 @@ class ThumbV7PILongThunk final : public ThumbThunk {
 // can result in a thunk
 class ARMV5ABSLongThunk final : public ARMThunk {
 public:
-  ARMV5ABSLongThunk(Symbol &dest) : ARMThunk(dest) {}
+  ARMV5ABSLongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {}
 
   uint32_t sizeLong() override { return 8; }
   void writeLong(uint8_t *buf) override;
@@ -177,7 +180,7 @@ class ARMV5ABSLongThunk final : public ARMThunk {
 
 class ARMV5PILongThunk final : public ARMThunk {
 public:
-  ARMV5PILongThunk(Symbol &dest) : ARMThunk(dest) {}
+  ARMV5PILongThunk(Symbol &dest, int64_t addend) : ARMThunk(dest, addend) {}
 
   uint32_t sizeLong() override { return 16; }
   void writeLong(uint8_t *buf) override;
@@ -189,7 +192,8 @@ class ARMV5PILongThunk final : public ARMThunk {
 // Implementations of Thunks for Arm v6-M. Only Thumb instructions are permitted
 class ThumbV6MABSLongThunk final : public ThumbThunk {
 public:
-  ThumbV6MABSLongThunk(Symbol &dest) : ThumbThunk(dest) {}
+  ThumbV6MABSLongThunk(Symbol &dest, int64_t addend)
+      : ThumbThunk(dest, addend) {}
 
   uint32_t sizeLong() override { return 12; }
   void writeLong(uint8_t *buf) override;
@@ -198,7 +202,8 @@ class ThumbV6MABSLongThunk final : public ThumbThunk {
 
 class ThumbV6MPILongThunk final : public ThumbThunk {
 public:
-  ThumbV6MPILongThunk(Symbol &dest) : ThumbThunk(dest) {}
+  ThumbV6MPILongThunk(Symbol &dest, int64_t addend)
+      : ThumbThunk(dest, addend) {}
 
   uint32_t sizeLong() override { return 16; }
   void writeLong(uint8_t *buf) override;
@@ -1040,7 +1045,7 @@ static Thunk *addThunkAArch64(RelType type, Symbol &s, int64_t a) {
 // - MOVT and MOVW instructions cannot be used
 // - Only Thumb relocation that can generate a Thunk is a BL, this can always
 //   be transformed into a BLX
-static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s) {
+static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s, int64_t a) {
   switch (reloc) {
   case R_ARM_PC24:
   case R_ARM_PLT32:
@@ -1048,8 +1053,8 @@ static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s) {
   case R_ARM_CALL:
   case R_ARM_THM_CALL:
     if (config->picThunk)
-      return make<ARMV5PILongThunk>(s);
-    return make<ARMV5ABSLongThunk>(s);
+      return make<ARMV5PILongThunk>(s, a);
+    return make<ARMV5ABSLongThunk>(s, a);
   }
   fatal("relocation " + toString(reloc) + " to " + toString(s) +
         " not supported for Armv5 or Armv6 targets");
@@ -1060,21 +1065,21 @@ static Thunk *addThunkPreArmv7(RelType reloc, Symbol &s) {
 // - MOVT and MOVW instructions cannot be used.
 // - Only a limited number of instructions can access registers r8 and above
 // - No interworking support is needed (all Thumb).
-static Thunk *addThunkV6M(RelType reloc, Symbol &s) {
+static Thunk *addThunkV6M(RelType reloc, Symbol &s, int64_t a) {
   switch (reloc) {
   case R_ARM_THM_JUMP19:
   case R_ARM_THM_JUMP24:
   case R_ARM_THM_CALL:
     if (config->isPic)
-      return make<ThumbV6MPILongThunk>(s);
-    return make<ThumbV6MABSLongThunk>(s);
+      return make<ThumbV6MPILongThunk>(s, a);
+    return make<ThumbV6MABSLongThunk>(s, a);
   }
   fatal("relocation " + toString(reloc) + " to " + toString(s) +
         " not supported for Armv6-M targets");
 }
 
 // Creates a thunk for Thumb-ARM interworking or branch range extension.
-static Thunk *addThunkArm(RelType reloc, Symbol &s) {
+static Thunk *addThunkArm(RelType reloc, Symbol &s, int64_t a) {
   // Decide which Thunk is needed based on:
   // Available instruction set
   // - An Arm Thunk can only be used if Arm state is available.
@@ -1093,8 +1098,8 @@ static Thunk *addThunkArm(RelType reloc, Symbol &s) {
   // architecture to flag.
   if (!config->armHasMovtMovw) {
     if (!config->armJ1J2BranchEncoding)
-      return addThunkPreArmv7(reloc, s);
-    return addThunkV6M(reloc, s);
+      return addThunkPreArmv7(reloc, s, a);
+    return addThunkV6M(reloc, s, a);
   }
 
   switch (reloc) {
@@ -1103,14 +1108,14 @@ static Thunk *addThunkArm(RelType reloc, Symbol &s) {
   case R_ARM_JUMP24:
   case R_ARM_CALL:
     if (config->picThunk)
-      return make<ARMV7PILongThunk>(s);
-    return make<ARMV7ABSLongThunk>(s);
+      return make<ARMV7PILongThunk>(s, a);
+    return make<ARMV7ABSLongThunk>(s, a);
   case R_ARM_THM_JUMP19:
   case R_ARM_THM_JUMP24:
   case R_ARM_THM_CALL:
     if (config->picThunk)
-      return make<ThumbV7PILongThunk>(s);
-    return make<ThumbV7ABSLongThunk>(s);
+      return make<ThumbV7PILongThunk>(s, a);
+    return make<ThumbV7ABSLongThunk>(s, a);
   }
   fatal("unrecognized relocation type");
 }
@@ -1166,7 +1171,7 @@ Thunk *elf::addThunk(const InputSection &isec, Relocation &rel) {
     return addThunkAArch64(rel.type, s, a);
 
   if (config->emachine == EM_ARM)
-    return addThunkArm(rel.type, s);
+    return addThunkArm(rel.type, s, a);
 
   if (config->emachine == EM_MIPS)
     return addThunkMips(rel.type, s);

diff  --git a/lld/test/ELF/arm-thunk-arm-thumb-reuse.s b/lld/test/ELF/arm-thunk-arm-thumb-reuse.s
new file mode 100644
index 000000000000..5b77c4fcf567
--- /dev/null
+++ b/lld/test/ELF/arm-thunk-arm-thumb-reuse.s
@@ -0,0 +1,61 @@
+// REQUIRES: arm
+// RUN: split-file %s %t
+// RUN: llvm-mc -arm-add-build-attributes -filetype=obj -triple=thumbv7a-none-linux-gnueabi %t/test.s -o %t.o
+// RUN: ld.lld --script %t/script %t.o -o %t2
+// RUN: llvm-objdump --no-show-raw-insn -d %t2 | FileCheck %s
+
+/// Test that we can reuse thunks between Arm and Thumb callers
+/// using a BL. Expect two thunks, one for far, one for far2.
+
+//--- script
+SECTIONS {
+        .text 0x10000 : { *(.text) }
+        .text.far 0x10000000 : AT (0x10000000) { *(.far) }
+}
+
+//--- test.s
+
+.syntax unified
+.text
+.globl _start
+.type _start, %function
+ .arm
+_start:
+ bl far
+ .thumb
+ bl far
+ bl far2
+ .arm
+ bl far2
+
+// CHECK:   00010000 <_start>:
+// CHECK-NEXT: 10000: bl      #8 <__ARMv7ABSLongThunk_far>
+// CHECK:   00010004 <$t.1>:
+// CHECK-NEXT: 10004: blx     #8
+// CHECK-NEXT: 10008: bl      #16
+// CHECK:   0001000c <$a.2>:
+// CHECK-NEXT: 1000c: blx     #8 <__Thumbv7ABSLongThunk_far2>
+// CHECK:   00010010 <__ARMv7ABSLongThunk_far>:
+// CHECK-NEXT: 10010: movw    r12, #0
+// CHECK-NEXT: 10014: movt    r12, #4096
+// CHECK-NEXT: 10018: bx      r12
+// CHECK:   0001001c <__Thumbv7ABSLongThunk_far2>:
+// CHECK-NEXT: 1001c: movw    r12, #4
+// CHECK-NEXT: 10020: movt    r12, #4096
+// CHECK-NEXT: 10024: bx      r12
+
+.section .text.far, "ax", %progbits
+.globl far
+.type far, %function
+far:
+ bx lr
+.globl far2
+.type far2, %function
+far2:
+ bx lr
+
+// CHECK: Disassembly of section .text.far:
+// CHECK:      10000000 <far>:
+// CHECK-NEXT: 10000000: bx      lr
+// CHECK:      10000004 <far2>:
+// CHECK-NEXT: 10000004: bx      lr


        


More information about the llvm-commits mailing list