[lld] 29c1361 - [LLD][ELF][ARM] Do not substitute BL/BLX for non STT_FUNC symbols.

Peter Smith via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 13 01:45:43 PST 2020


Author: Peter Smith
Date: 2020-02-13T09:40:21Z
New Revision: 29c13615576b80129c8167df1d77f1ce98a97ae7

URL: https://github.com/llvm/llvm-project/commit/29c13615576b80129c8167df1d77f1ce98a97ae7
DIFF: https://github.com/llvm/llvm-project/commit/29c13615576b80129c8167df1d77f1ce98a97ae7.diff

LOG: [LLD][ELF][ARM] Do not substitute BL/BLX for non STT_FUNC symbols.

Recommit of 0b4a047bfbd11fe1f5abda8da0e2391c1918162a
(reverted in c29003813ab9bd6ea7b6de40ea8f1fe21979f13f) to incorporate
subsequent fix and add a warning when LLD's interworking behavior has
changed.

D73474 disabled the generation of interworking thunks for branch
relocations to non STT_FUNC symbols. This patch handles the case of BL and
BLX instructions to non STT_FUNC symbols. LLD would normally look at the
state of the caller and the callee and write a BL if the states are the
same and a BLX if the states are different.

This patch disables BL/BLX substitution when the destination symbol does
not have type STT_FUNC. This brings our behavior in line with GNU ld which
may prevent difficult to diagnose runtime errors when switching to lld.

This change does change how LLD handles interworking of symbols that do not
have type STT_FUNC from previous versions including the 10.0 release. This
brings LLD in line with ld.bfd but there may be programs that have not been
linked with ld.bfd that depend on LLD's previous behavior. We emit a warning
when the behavior changes.

A summary of the difference between 10.0 and 11.0 is that for symbols
that do not have a type of STT_FUNC LLD will not change a BL to a BLX or
vice versa. The table below enumerates the changes
| relocation     | STT_FUNC | bit(0) | in  | 10.0- out | 11.0+ out |
| R_ARM_CALL     | no       | 1      | BL  | BLX       | BL        |
| R_ARM_CALL     | no       | 0      | BLX | BL        | BLX       |
| R_ARM_THM_CALL | no       | 1      | BLX | BL        | BLX       |
| R_ARM_THM_CALL | no       | 0      | BL  | BLX       | BL        |

Differential Revision: https://reviews.llvm.org/D73542

Added: 
    lld/test/ELF/arm-thumb-interwork-abs.s

Modified: 
    lld/ELF/Arch/ARM.cpp
    lld/test/ELF/arm-thumb-interwork-notfunc.s
    lld/test/ELF/arm-thumb-undefined-weak.s
    lld/test/ELF/arm-undefined-weak.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index 7f18effa096f..25caf856b9e2 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -377,6 +377,28 @@ bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
   return distance <= range;
 }
 
+// Helper to produce message text when LLD detects that a CALL relocation to
+// a non STT_FUNC symbol that may result in incorrect interworking between ARM
+// or Thumb.
+static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) {
+  assert(!s.isFunc());
+  if (s.isSection()) {
+    // Section symbols must be defined and in a section. Users cannot change
+    // the type. Use the section name as getName() returns an empty string.
+    warn(getErrorLocation(loc) + "branch and link relocation: " +
+         toString(relt) + " to STT_SECTION symbol " +
+         cast<Defined>(s).section->name + " ; interworking not performed");
+  } else {
+    // Warn with hint on how to alter the symbol type.
+    warn(getErrorLocation(loc) + "branch and link relocation: " +
+         toString(relt) + " to non STT_FUNC symbol: " + s.getName() +
+         " interworking not performed; consider using directive '.type " +
+         s.getName() +
+         ", %function' to give symbol type STT_FUNC if"
+         " interworking between ARM and Thumb is required");
+  }
+}
+
 void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   switch (rel.type) {
   case R_ARM_ABS32:
@@ -402,11 +424,20 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     checkInt(loc, val, 31, rel);
     write32le(loc, (read32le(loc) & 0x80000000) | (val & ~0x80000000));
     break;
-  case R_ARM_CALL:
-    // R_ARM_CALL is used for BL and BLX instructions, depending on the
-    // value of bit 0 of Val, we must select a BL or BLX instruction
-    if (val & 1) {
-      // If bit 0 of Val is 1 the target is Thumb, we must select a BLX.
+  case R_ARM_CALL: {
+    // R_ARM_CALL is used for BL and BLX instructions, for symbols of type
+    // STT_FUNC we choose whether to write a BL or BLX depending on the
+    // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is
+    // not of type STT_FUNC then we must preserve the original instruction.
+    // PLT entries are always ARM state so we know we don't need to interwork.
+    assert(rel.sym); // R_ARM_CALL is always reached via relocate().
+    bool bit0Thumb = val & 1;
+    bool isBlx = (read32le(loc) & 0xfe000000) == 0xfa000000;
+    // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
+    // even when type not STT_FUNC.
+    if (!rel.sym->isFunc() && isBlx != bit0Thumb)
+      stateChangeWarning(loc, rel.type, *rel.sym);
+    if (rel.sym->isFunc() ? bit0Thumb : isBlx) {
       // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1'
       checkInt(loc, val, 26, rel);
       write32le(loc, 0xfa000000 |                    // opcode
@@ -414,11 +445,11 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
                          ((val >> 2) & 0x00ffffff)); // imm24
       break;
     }
-    if ((read32le(loc) & 0xfe000000) == 0xfa000000)
-      // BLX (always unconditional) instruction to an ARM Target, select an
-      // unconditional BL.
-      write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff));
+    // BLX (always unconditional) instruction to an ARM Target, select an
+    // unconditional BL.
+    write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff));
     // fall through as BL encoding is shared with B
+  }
     LLVM_FALLTHROUGH;
   case R_ARM_JUMP24:
   case R_ARM_PC24:
@@ -443,16 +474,28 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
                   ((val >> 5) & 0x2000) | // J1
                   ((val >> 1) & 0x07ff)); // imm11
     break;
-  case R_ARM_THM_CALL:
-    // R_ARM_THM_CALL is used for BL and BLX instructions, depending on the
-    // value of bit 0 of Val, we must select a BL or BLX instruction
-    if ((val & 1) == 0) {
-      // Ensure BLX destination is 4-byte aligned. As BLX instruction may
-      // only be two byte aligned. This must be done before overflow check
+  case R_ARM_THM_CALL: {
+    // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type
+    // STT_FUNC we choose whether to write a BL or BLX depending on the
+    // value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is
+    // not of type STT_FUNC then we must preserve the original instruction.
+    // PLT entries are always ARM state so we know we need to interwork.
+    assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().
+    bool bit0Thumb = val & 1;
+    bool isBlx = (read16le(loc + 2) & 0x1000) == 0;
+    // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
+    // even when type not STT_FUNC. PLT entries generated by LLD are always ARM.
+    if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb)
+      stateChangeWarning(loc, rel.type, *rel.sym);
+    if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) {
+      // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
+      // the BLX instruction may only be two byte aligned. This must be done
+      // before overflow check.
       val = alignTo(val, 4);
+      write16le(loc + 2, read16le(loc + 2) & ~0x1000);
+    } else {
+      write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | 1 << 12);
     }
-    // Bit 12 is 0 for BLX, 1 for BL
-    write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | (val & 1) << 12);
     if (!config->armJ1J2BranchEncoding) {
       // Older Arm architectures do not support R_ARM_THM_JUMP24 and have
       // 
diff erent encoding rules and range due to J1 and J2 always being 1.
@@ -466,6 +509,7 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
                     ((val >> 1) & 0x07ff));    // imm11
       break;
     }
+  }
     // Fall through as rest of encoding is the same as B.W
     LLVM_FALLTHROUGH;
   case R_ARM_THM_JUMP24:

diff  --git a/lld/test/ELF/arm-thumb-interwork-abs.s b/lld/test/ELF/arm-thumb-interwork-abs.s
new file mode 100644
index 000000000000..85d13387a95a
--- /dev/null
+++ b/lld/test/ELF/arm-thumb-interwork-abs.s
@@ -0,0 +1,38 @@
+// REQUIRES: arm
+// RUN: llvm-mc --triple=armv7a-linux-gnueabihf -arm-add-build-attributes -filetype=obj -o %t.o %s
+// RUN: ld.lld %t.o --defsym sym=0x13001 -o %t 2>&1 | FileCheck %s --check-prefix=WARN
+// RUN: llvm-objdump --no-show-raw-insn -d %t | FileCheck %s
+
+/// A similar test to arm-thumb-interwork-notfunc.s this time exercising the
+/// case where a symbol does not have type STT_FUNC but it does have the bottom
+/// bit set. We use absolute symbols to represent assembler labels as the
+/// minimum alignment of a label in code is 2.
+.syntax unified
+.global sym
+.global _start
+.type _start, %function
+.text
+.balign 0x1000
+_start:
+arm_caller:
+.arm
+  b sym
+  bl sym
+// WARN: branch and link relocation: R_ARM_CALL to non STT_FUNC symbol: sym interworking not performed; consider using directive '.type sym, %function' to give symbol type STT_FUNC if interworking between ARM and Thumb is required
+  blx sym
+.thumb
+thumb_caller:
+  b sym
+  bl sym
+  blx sym
+// WARN: branch and link relocation: R_ARM_THM_CALL to non STT_FUNC symbol: sym interworking not performed; consider using directive '.type sym, %function' to give symbol type STT_FUNC if interworking between ARM and Thumb is required
+
+// CHECK: 00012000 arm_caller:
+// CHECK-NEXT:    12000: b   #4088
+// CHECK-NEXT:    12004: bl  #4084
+// CHECK-NEXT:    12008: blx #4080
+
+// CHECK: 0001200c thumb_caller:
+// CHECK-NEXT:    1200c: b.w     #4080
+// CHECK-NEXT:    12010: bl      #4076
+// CHECK-NEXT:    12014: blx     #4076

diff  --git a/lld/test/ELF/arm-thumb-interwork-notfunc.s b/lld/test/ELF/arm-thumb-interwork-notfunc.s
index 4f29fdf541fb..40f3afa28b1b 100644
--- a/lld/test/ELF/arm-thumb-interwork-notfunc.s
+++ b/lld/test/ELF/arm-thumb-interwork-notfunc.s
@@ -1,6 +1,7 @@
 // REQUIRES: arm
-// RUN: llvm-mc --triple=armv7a-linux-gnueabihf -arm-add-build-attributes -filetype=obj -o %t.o %s
-// RUN: ld.lld %t.o -o %t
+// RUN: llvm-mc -g --triple=armv7a-linux-gnueabihf -arm-add-build-attributes -filetype=obj -o %t.o %s
+// RUN: ld.lld %t.o --no-threads -o %t 2>&1
+// RUN: ld.lld %t.o --no-threads -o %t 2>&1 | FileCheck %s --check-prefix=WARN
 // RUN: llvm-objdump --no-show-raw-insn -d %t | FileCheck %s
 
 .syntax unified
@@ -23,7 +24,11 @@ thumb_func_with_explicit_notype:
 /// All the symbols that are targets of the branch relocations do not have
 /// type STT_FUNC. LLD should not insert interworking thunks as non STT_FUNC
 /// symbols have no state information, the ABI assumes the user has manually
-/// done the interworking.
+/// done the interworking. For the BL and BLX instructions LLD should
+/// preserve the original instruction instead of writing out the correct one
+/// for the assumed state at the target.
+/// LLD will warn for the BL and BLX cases where the behavior has changed
+/// from LLD 10.0
 .section .arm_caller, "ax", %progbits
 .balign 4
 .arm
@@ -35,10 +40,30 @@ _start:
  b .thumb_target
  b thumb_func_with_notype
  b thumb_func_with_explicit_notype
+ bl .arm_target
+ bl arm_func_with_notype
+ bl arm_func_with_explicit_notype
+ bl .thumb_target
+ bl thumb_func_with_notype
+ bl thumb_func_with_explicit_notype
+// WARN: {{.*}}.s:[[# @LINE+1]]:(.arm_caller+0x30): branch and link relocation: R_ARM_CALL to STT_SECTION symbol .arm_target ; interworking not performed
+ blx .arm_target
+// WARN: {{.*}}.s:[[# @LINE+1]]:(.arm_caller+0x34): branch and link relocation: R_ARM_CALL to non STT_FUNC symbol: arm_func_with_notype interworking not performed; consider using directive '.type arm_func_with_notype, %function' to give symbol type STT_FUNC if interworking between ARM and Thumb is required
+ blx arm_func_with_notype
+// WARN: {{.*}}.s:[[# @LINE+1]]:(.arm_caller+0x38): branch and link relocation: R_ARM_CALL to non STT_FUNC symbol: arm_func_with_explicit_notype interworking not performed; consider using directive '.type arm_func_with_explicit_notype, %function' to give symbol type STT_FUNC if interworking between ARM and Thumb is required
+ blx arm_func_with_explicit_notype
+// WARN: {{.*}}.s:[[# @LINE+1]]:(.arm_caller+0x3C): branch and link relocation: R_ARM_CALL to STT_SECTION symbol .thumb_target ; interworking not performed
+ blx .thumb_target
+// WARN: {{.*}}.s:[[# @LINE+1]]:(.arm_caller+0x40): branch and link relocation: R_ARM_CALL to non STT_FUNC symbol: thumb_func_with_notype interworking not performed; consider using directive '.type thumb_func_with_notype, %function' to give symbol type STT_FUNC if interworking between ARM and Thumb is required
+ blx thumb_func_with_notype
+// WARN: {{.*}}.s:[[# @LINE+1]]:(.arm_caller+0x44): branch and link relocation: R_ARM_CALL to non STT_FUNC symbol: thumb_func_with_explicit_notype interworking not performed; consider using directive '.type thumb_func_with_explicit_notype, %function' to give symbol type STT_FUNC if interworking between ARM and Thumb is required
+ blx thumb_func_with_explicit_notype
 
  .section .thumb_caller, "ax", %progbits
  .thumb
  .balign 4
+ .global thumb_caller
+thumb_caller:
  b.w .arm_target
  b.w arm_func_with_notype
  b.w arm_func_with_explicit_notype
@@ -51,6 +76,24 @@ _start:
  beq.w .thumb_target
  beq.w thumb_func_with_notype
  beq.w thumb_func_with_explicit_notype
+// WARN: {{.*}}.s:[[# @LINE+1]]:(.thumb_caller+0x30): branch and link relocation: R_ARM_THM_CALL to STT_SECTION symbol .arm_target ; interworking not performed
+ bl .arm_target
+// WARN: {{.*}}.s:[[# @LINE+1]]:(.thumb_caller+0x34): branch and link relocation: R_ARM_THM_CALL to non STT_FUNC symbol: arm_func_with_notype interworking not performed; consider using directive '.type arm_func_with_notype, %function' to give symbol type STT_FUNC if interworking between ARM and Thumb is required
+ bl arm_func_with_notype
+ // WARN: {{.*}}.s:[[# @LINE+1]]:(.thumb_caller+0x38): branch and link relocation: R_ARM_THM_CALL to non STT_FUNC symbol: arm_func_with_explicit_notype interworking not performed; consider using directive '.type arm_func_with_explicit_notype, %function' to give symbol type STT_FUNC if interworking between ARM and Thumb is required
+ bl arm_func_with_explicit_notype
+// WARN: {{.*}}.s:[[# @LINE+1]]:(.thumb_caller+0x3C): branch and link relocation: R_ARM_THM_CALL to STT_SECTION symbol .thumb_target ; interworking not performed
+ bl .thumb_target
+// WARN: {{.*}}.s:[[# @LINE+1]]:(.thumb_caller+0x40): branch and link relocation: R_ARM_THM_CALL to non STT_FUNC symbol: thumb_func_with_notype interworking not performed; consider using directive '.type thumb_func_with_notype, %function' to give symbol type STT_FUNC if interworking between ARM and Thumb is required
+ bl thumb_func_with_notype
+// {{.*}}.s:[[# @LINE+1]]:(.thumb_caller+0x44): branch and link relocation: R_ARM_THM_CALL to non STT_FUNC symbol: thumb_func_with_explicit_notype interworking not performed; consider using directive '.type thumb_func_with_explicit_notype, %function' to give symbol type STT_FUNC if interworking between ARM and Thumb is required
+ bl thumb_func_with_explicit_notype
+ blx .arm_target
+ blx arm_func_with_notype
+ blx arm_func_with_explicit_notype
+ blx .thumb_target
+ blx thumb_func_with_notype
+ blx thumb_func_with_explicit_notype
 
 // CHECK: 00012008 _start:
 // CHECK-NEXT: 12008: b       #-16
@@ -59,15 +102,41 @@ _start:
 // CHECK-NEXT: 12014: b       #-24
 // CHECK-NEXT: 12018: b       #-28
 // CHECK-NEXT: 1201c: b       #-32
-// CHECK:      12020: b.w     #-36
-// CHECK-NEXT: 12024: b.w     #-40
-// CHECK-NEXT: 12028: b.w     #-44
-// CHECK-NEXT: 1202c: b.w     #-44
-// CHECK-NEXT: 12030: b.w     #-48
-// CHECK-NEXT: 12034: b.w     #-52
-// CHECK-NEXT: 12038: beq.w   #-60
-// CHECK-NEXT: 1203c: beq.w   #-64
-// CHECK-NEXT: 12040: beq.w   #-68
-// CHECK-NEXT: 12044: beq.w   #-68
-// CHECK-NEXT: 12048: beq.w   #-72
-// CHECK-NEXT: 1204c: beq.w   #-76
+// CHECK-NEXT: 12020: bl      #-40
+// CHECK-NEXT: 12024: bl      #-44
+// CHECK-NEXT: 12028: bl      #-48
+// CHECK-NEXT: 1202c: bl      #-48
+// CHECK-NEXT: 12030: bl      #-52
+// CHECK-NEXT: 12034: bl      #-56
+// CHECK-NEXT: 12038: blx     #-64
+// CHECK-NEXT: 1203c: blx     #-68
+// CHECK-NEXT: 12040: blx     #-72
+// CHECK-NEXT: 12044: blx     #-72
+// CHECK-NEXT: 12048: blx     #-76
+// CHECK-NEXT: 1204c: blx     #-80
+
+// CHECK: 00012050 thumb_caller:
+// CHECK-NEXT: 12050: b.w     #-84
+// CHECK-NEXT: 12054: b.w     #-88
+// CHECK-NEXT: 12058: b.w     #-92
+// CHECK-NEXT: 1205c: b.w     #-92
+// CHECK-NEXT: 12060: b.w     #-96
+// CHECK-NEXT: 12064: b.w     #-100
+// CHECK-NEXT: 12068: beq.w   #-108
+// CHECK-NEXT: 1206c: beq.w   #-112
+// CHECK-NEXT: 12070: beq.w   #-116
+// CHECK-NEXT: 12074: beq.w   #-116
+// CHECK-NEXT: 12078: beq.w   #-120
+// CHECK-NEXT: 1207c: beq.w   #-124
+// CHECK-NEXT: 12080: bl      #-132
+// CHECK-NEXT: 12084: bl      #-136
+// CHECK-NEXT: 12088: bl      #-140
+// CHECK-NEXT: 1208c: bl      #-140
+// CHECK-NEXT: 12090: bl      #-144
+// CHECK-NEXT: 12094: bl      #-148
+// CHECK-NEXT: 12098: blx     #-156
+// CHECK-NEXT: 1209c: blx     #-160
+// CHECK-NEXT: 120a0: blx     #-164
+// CHECK-NEXT: 120a4: blx     #-164
+// CHECK-NEXT: 120a8: blx     #-168
+// CHECK-NEXT: 120ac: blx     #-172

diff  --git a/lld/test/ELF/arm-thumb-undefined-weak.s b/lld/test/ELF/arm-thumb-undefined-weak.s
index b6812b8cc91a..82069a7d9db5 100644
--- a/lld/test/ELF/arm-thumb-undefined-weak.s
+++ b/lld/test/ELF/arm-thumb-undefined-weak.s
@@ -10,6 +10,7 @@
  .syntax unified
 
  .weak target
+ .type target, %function
 
  .text
  .global _start

diff  --git a/lld/test/ELF/arm-undefined-weak.s b/lld/test/ELF/arm-undefined-weak.s
index 2d4726358095..c7e3f5f7694d 100644
--- a/lld/test/ELF/arm-undefined-weak.s
+++ b/lld/test/ELF/arm-undefined-weak.s
@@ -12,6 +12,7 @@
  .syntax unified
 
  .weak target
+ .type target, %function
 
  .text
  .global _start


        


More information about the llvm-commits mailing list