[lld] 0b4a047 - [LLD][ELF][ARM] Do not substitute BL/BLX for non STT_FUNC symbols.

Peter Smith via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 29 03:42:43 PST 2020


Author: Peter Smith
Date: 2020-01-29T11:42:25Z
New Revision: 0b4a047bfbd11fe1f5abda8da0e2391c1918162a

URL: https://github.com/llvm/llvm-project/commit/0b4a047bfbd11fe1f5abda8da0e2391c1918162a
DIFF: https://github.com/llvm/llvm-project/commit/0b4a047bfbd11fe1f5abda8da0e2391c1918162a.diff

LOG: [LLD][ELF][ARM] Do not substitute BL/BLX for non STT_FUNC symbols.

D73474 disabled the generation of interworking thunks for branch
relocations to non STT_FUNC symbols. This patch handles the case of BL and
BLX instructions to non STT_FUNC symbols. LLD would normally look at the
state of the caller and the callee and write a BL if the states are the
same and a BLX if the states are different.

This patch disables BL/BLX substitution when the destination symbol does
not have type STT_FUNC. This brings our behavior in line with GNU ld which
may prevent difficult to diagnose runtime errors when switching to lld.

Differential Revision: https://reviews.llvm.org/D73542

Added: 
    

Modified: 
    lld/ELF/Arch/ARM.cpp
    lld/test/ELF/arm-thumb-interwork-notfunc.s
    lld/test/ELF/arm-thumb-undefined-weak.s
    lld/test/ELF/arm-undefined-weak.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index 7f18effa096f..9101ad7652a6 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -402,11 +402,15 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     checkInt(loc, val, 31, rel);
     write32le(loc, (read32le(loc) & 0x80000000) | (val & ~0x80000000));
     break;
-  case R_ARM_CALL:
-    // R_ARM_CALL is used for BL and BLX instructions, depending on the
-    // value of bit 0 of Val, we must select a BL or BLX instruction
-    if (val & 1) {
-      // If bit 0 of Val is 1 the target is Thumb, we must select a BLX.
+  case R_ARM_CALL: {
+    // R_ARM_CALL is used for BL and BLX instructions, for symbols of type
+    // STT_FUNC we choose whether to write a BL or BLX depending on the
+    // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is
+    // not of type STT_FUNC then we must preserve the original instruction.
+    // PLT entries are always ARM state so we know we don't need to interwork.
+    bool isBlx = (read32le(loc) & 0xfe000000) == 0xfa000000;
+    bool interwork = rel.sym && rel.sym->isFunc() && rel.type != R_PLT_PC;
+    if (interwork ? val & 1 : isBlx) {
       // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1'
       checkInt(loc, val, 26, rel);
       write32le(loc, 0xfa000000 |                    // opcode
@@ -414,11 +418,11 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
                          ((val >> 2) & 0x00ffffff)); // imm24
       break;
     }
-    if ((read32le(loc) & 0xfe000000) == 0xfa000000)
-      // BLX (always unconditional) instruction to an ARM Target, select an
-      // unconditional BL.
-      write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff));
+    // BLX (always unconditional) instruction to an ARM Target, select an
+    // unconditional BL.
+    write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff));
     // fall through as BL encoding is shared with B
+  }
     LLVM_FALLTHROUGH;
   case R_ARM_JUMP24:
   case R_ARM_PC24:
@@ -443,16 +447,23 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
                   ((val >> 5) & 0x2000) | // J1
                   ((val >> 1) & 0x07ff)); // imm11
     break;
-  case R_ARM_THM_CALL:
-    // R_ARM_THM_CALL is used for BL and BLX instructions, depending on the
-    // value of bit 0 of Val, we must select a BL or BLX instruction
-    if ((val & 1) == 0) {
-      // Ensure BLX destination is 4-byte aligned. As BLX instruction may
-      // only be two byte aligned. This must be done before overflow check
+  case R_ARM_THM_CALL: {
+    // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type
+    // STT_FUNC we choose whether to write a BL or BLX depending on the
+    // value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is
+    // not of type STT_FUNC then we must preserve the original instruction.
+    // PLT entries are always ARM state so we know we need to interwork.
+    bool isBlx = (read16le(loc + 2) & 0x1000) == 0;
+    bool interwork = (rel.sym && rel.sym->isFunc()) || rel.type == R_PLT_PC;
+    if (interwork ? (val & 1) == 0 : isBlx) {
+      // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
+      // the BLX instruction may only be two byte aligned. This must be done
+      // before overflow check.
       val = alignTo(val, 4);
+      write16le(loc + 2, read16le(loc + 2) & ~0x1000);
+    } else {
+      write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | 1 << 12);
     }
-    // Bit 12 is 0 for BLX, 1 for BL
-    write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | (val & 1) << 12);
     if (!config->armJ1J2BranchEncoding) {
       // Older Arm architectures do not support R_ARM_THM_JUMP24 and have
       // 
diff erent encoding rules and range due to J1 and J2 always being 1.
@@ -466,6 +477,7 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
                     ((val >> 1) & 0x07ff));    // imm11
       break;
     }
+  }
     // Fall through as rest of encoding is the same as B.W
     LLVM_FALLTHROUGH;
   case R_ARM_THM_JUMP24:

diff  --git a/lld/test/ELF/arm-thumb-interwork-notfunc.s b/lld/test/ELF/arm-thumb-interwork-notfunc.s
index 4f29fdf541fb..49a55f20d401 100644
--- a/lld/test/ELF/arm-thumb-interwork-notfunc.s
+++ b/lld/test/ELF/arm-thumb-interwork-notfunc.s
@@ -23,7 +23,9 @@ thumb_func_with_explicit_notype:
 /// All the symbols that are targets of the branch relocations do not have
 /// type STT_FUNC. LLD should not insert interworking thunks as non STT_FUNC
 /// symbols have no state information, the ABI assumes the user has manually
-/// done the interworking.
+/// done the interworking. For the BL and BLX instructions LLD should
+/// preserve the original instruction instead of writing out the correct one
+/// for the assumed state at the target.
 .section .arm_caller, "ax", %progbits
 .balign 4
 .arm
@@ -35,10 +37,24 @@ _start:
  b .thumb_target
  b thumb_func_with_notype
  b thumb_func_with_explicit_notype
+ bl .arm_target
+ bl arm_func_with_notype
+ bl arm_func_with_explicit_notype
+ bl .thumb_target
+ bl thumb_func_with_notype
+ bl thumb_func_with_explicit_notype
+ blx .arm_target
+ blx arm_func_with_notype
+ blx arm_func_with_explicit_notype
+ blx .thumb_target
+ blx thumb_func_with_notype
+ blx thumb_func_with_explicit_notype
 
  .section .thumb_caller, "ax", %progbits
  .thumb
  .balign 4
+ .global thumb_caller
+thumb_caller:
  b.w .arm_target
  b.w arm_func_with_notype
  b.w arm_func_with_explicit_notype
@@ -51,6 +67,18 @@ _start:
  beq.w .thumb_target
  beq.w thumb_func_with_notype
  beq.w thumb_func_with_explicit_notype
+ bl .arm_target
+ bl arm_func_with_notype
+ bl arm_func_with_explicit_notype
+ bl .thumb_target
+ bl thumb_func_with_notype
+ bl thumb_func_with_explicit_notype
+ blx .arm_target
+ blx arm_func_with_notype
+ blx arm_func_with_explicit_notype
+ blx .thumb_target
+ blx thumb_func_with_notype
+ blx thumb_func_with_explicit_notype
 
 // CHECK: 00012008 _start:
 // CHECK-NEXT: 12008: b       #-16
@@ -59,15 +87,41 @@ _start:
 // CHECK-NEXT: 12014: b       #-24
 // CHECK-NEXT: 12018: b       #-28
 // CHECK-NEXT: 1201c: b       #-32
-// CHECK:      12020: b.w     #-36
-// CHECK-NEXT: 12024: b.w     #-40
-// CHECK-NEXT: 12028: b.w     #-44
-// CHECK-NEXT: 1202c: b.w     #-44
-// CHECK-NEXT: 12030: b.w     #-48
-// CHECK-NEXT: 12034: b.w     #-52
-// CHECK-NEXT: 12038: beq.w   #-60
-// CHECK-NEXT: 1203c: beq.w   #-64
-// CHECK-NEXT: 12040: beq.w   #-68
-// CHECK-NEXT: 12044: beq.w   #-68
-// CHECK-NEXT: 12048: beq.w   #-72
-// CHECK-NEXT: 1204c: beq.w   #-76
+// CHECK-NEXT: 12020: bl      #-40
+// CHECK-NEXT: 12024: bl      #-44
+// CHECK-NEXT: 12028: bl      #-48
+// CHECK-NEXT: 1202c: bl      #-48
+// CHECK-NEXT: 12030: bl      #-52
+// CHECK-NEXT: 12034: bl      #-56
+// CHECK-NEXT: 12038: blx     #-64
+// CHECK-NEXT: 1203c: blx     #-68
+// CHECK-NEXT: 12040: blx     #-72
+// CHECK-NEXT: 12044: blx     #-72
+// CHECK-NEXT: 12048: blx     #-76
+// CHECK-NEXT: 1204c: blx     #-80
+
+// CHECK: 00012050 thumb_caller:
+// CHECK-NEXT: 12050: b.w     #-84
+// CHECK-NEXT: 12054: b.w     #-88
+// CHECK-NEXT: 12058: b.w     #-92
+// CHECK-NEXT: 1205c: b.w     #-92
+// CHECK-NEXT: 12060: b.w     #-96
+// CHECK-NEXT: 12064: b.w     #-100
+// CHECK-NEXT: 12068: beq.w   #-108
+// CHECK-NEXT: 1206c: beq.w   #-112
+// CHECK-NEXT: 12070: beq.w   #-116
+// CHECK-NEXT: 12074: beq.w   #-116
+// CHECK-NEXT: 12078: beq.w   #-120
+// CHECK-NEXT: 1207c: beq.w   #-124
+// CHECK-NEXT: 12080: bl      #-132
+// CHECK-NEXT: 12084: bl      #-136
+// CHECK-NEXT: 12088: bl      #-140
+// CHECK-NEXT: 1208c: bl      #-140
+// CHECK-NEXT: 12090: bl      #-144
+// CHECK-NEXT: 12094: bl      #-148
+// CHECK-NEXT: 12098: blx     #-156
+// CHECK-NEXT: 1209c: blx     #-160
+// CHECK-NEXT: 120a0: blx     #-164
+// CHECK-NEXT: 120a4: blx     #-164
+// CHECK-NEXT: 120a8: blx     #-168
+// CHECK-NEXT: 120ac: blx     #-172

diff  --git a/lld/test/ELF/arm-thumb-undefined-weak.s b/lld/test/ELF/arm-thumb-undefined-weak.s
index b6812b8cc91a..82069a7d9db5 100644
--- a/lld/test/ELF/arm-thumb-undefined-weak.s
+++ b/lld/test/ELF/arm-thumb-undefined-weak.s
@@ -10,6 +10,7 @@
  .syntax unified
 
  .weak target
+ .type target, %function
 
  .text
  .global _start

diff  --git a/lld/test/ELF/arm-undefined-weak.s b/lld/test/ELF/arm-undefined-weak.s
index 2d4726358095..c7e3f5f7694d 100644
--- a/lld/test/ELF/arm-undefined-weak.s
+++ b/lld/test/ELF/arm-undefined-weak.s
@@ -12,6 +12,7 @@
  .syntax unified
 
  .weak target
+ .type target, %function
 
  .text
  .global _start


        


More information about the llvm-commits mailing list