[lld] 7832769 - Revert "[lld] Support thumb PLTs" (#93631)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 28 18:46:26 PDT 2024
Author: Mehdi Amini
Date: 2024-05-28T19:46:23-06:00
New Revision: 7832769d329ead264aff238c06dce086b3a74922
URL: https://github.com/llvm/llvm-project/commit/7832769d329ead264aff238c06dce086b3a74922
DIFF: https://github.com/llvm/llvm-project/commit/7832769d329ead264aff238c06dce086b3a74922.diff
LOG: Revert "[lld] Support thumb PLTs" (#93631)
Reverts llvm/llvm-project#86223
windows pre-merge is broken.
Added:
Modified:
lld/ELF/Arch/ARM.cpp
lld/ELF/Config.h
lld/ELF/InputFiles.cpp
Removed:
lld/test/ELF/armv8-thumb-plt-reloc.s
################################################################################
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index 3e0efe540e1bf..687f9499009d5 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -231,71 +231,36 @@ static void writePltHeaderLong(uint8_t *buf) {
// The default PLT header requires the .got.plt to be within 128 Mb of the
// .plt in the positive direction.
void ARM::writePltHeader(uint8_t *buf) const {
- if (config->armThumbPLTs) {
- // The instruction sequence for thumb:
- //
- // 0: b500 push {lr}
- // 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe <func+0xe>
- // 6: 44fe add lr, pc
- // 8: f85e ff08 ldr pc, [lr, #8]!
- // e: .word .got.plt - .plt - 16
- //
- // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from
- // `pc` in the add instruction and 8 bytes for the `lr` adjustment.
- //
- uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16;
- assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
- write16(buf + 0, 0xb500);
- // Split into two halves to support endianness correctly.
- write16(buf + 2, 0xf8df);
- write16(buf + 4, 0xe008);
- write16(buf + 6, 0x44fe);
- // Split into two halves to support endianness correctly.
- write16(buf + 8, 0xf85e);
- write16(buf + 10, 0xff08);
- write32(buf + 12, offset);
-
- memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
- memcpy(buf + 20, trapInstr.data(), 4);
- memcpy(buf + 24, trapInstr.data(), 4);
- memcpy(buf + 28, trapInstr.data(), 4);
- } else {
- // Use a similar sequence to that in writePlt(), the
diff erence is the
- // calling conventions mean we use lr instead of ip. The PLT entry is
- // responsible for saving lr on the stack, the dynamic loader is responsible
- // for reloading it.
- const uint32_t pltData[] = {
- 0xe52de004, // L1: str lr, [sp,#-4]!
- 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
- 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
- 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
- };
-
- uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
- if (!llvm::isUInt<27>(offset)) {
- // We cannot encode the Offset, use the long form.
- writePltHeaderLong(buf);
- return;
- }
- write32(buf + 0, pltData[0]);
- write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
- write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
- write32(buf + 12, pltData[3] | (offset & 0xfff));
- memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
- memcpy(buf + 20, trapInstr.data(), 4);
- memcpy(buf + 24, trapInstr.data(), 4);
- memcpy(buf + 28, trapInstr.data(), 4);
+ // Use a similar sequence to that in writePlt(), the
diff erence is the calling
+ // conventions mean we use lr instead of ip. The PLT entry is responsible for
+ // saving lr on the stack, the dynamic loader is responsible for reloading
+ // it.
+ const uint32_t pltData[] = {
+ 0xe52de004, // L1: str lr, [sp,#-4]!
+ 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
+ 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
+ 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
+ };
+
+ uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
+ if (!llvm::isUInt<27>(offset)) {
+ // We cannot encode the Offset, use the long form.
+ writePltHeaderLong(buf);
+ return;
}
+ write32(buf + 0, pltData[0]);
+ write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
+ write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
+ write32(buf + 12, pltData[3] | (offset & 0xfff));
+ memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
+ memcpy(buf + 20, trapInstr.data(), 4);
+ memcpy(buf + 24, trapInstr.data(), 4);
+ memcpy(buf + 28, trapInstr.data(), 4);
}
void ARM::addPltHeaderSymbols(InputSection &isec) const {
- if (config->armThumbPLTs) {
- addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec);
- addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec);
- } else {
- addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);
- addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);
- }
+ addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);
+ addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);
}
// Long form PLT entries that do not have any restrictions on the displacement
@@ -314,65 +279,32 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr,
// .plt in the positive direction.
void ARM::writePlt(uint8_t *buf, const Symbol &sym,
uint64_t pltEntryAddr) const {
+ // The PLT entry is similar to the example given in Appendix A of ELF for
+ // the Arm Architecture. Instead of using the Group Relocations to find the
+ // optimal rotation for the 8-bit immediate used in the add instructions we
+ // hard code the most compact rotations for simplicity. This saves a load
+ // instruction over the long plt sequences.
+ const uint32_t pltData[] = {
+ 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
+ 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
+ 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
+ };
- if (!config->armThumbPLTs) {
- uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
-
- // The PLT entry is similar to the example given in Appendix A of ELF for
- // the Arm Architecture. Instead of using the Group Relocations to find the
- // optimal rotation for the 8-bit immediate used in the add instructions we
- // hard code the most compact rotations for simplicity. This saves a load
- // instruction over the long plt sequences.
- const uint32_t pltData[] = {
- 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
- 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
- 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
- };
- if (!llvm::isUInt<27>(offset)) {
- // We cannot encode the Offset, use the long form.
- writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);
- return;
- }
- write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));
- write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));
- write32(buf + 8, pltData[2] | (offset & 0xfff));
- memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary
- } else {
- uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 12;
- assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
-
- // A PLT entry will be:
- //
- // movw ip, #<lower 16 bits>
- // movt ip, #<upper 16 bits>
- // add ip, pc
- // L1: ldr.w pc, [ip]
- // b L1
- //
- // where ip = r12 = 0xc
-
- // movw ip, #<lower 16 bits>
- write16(buf + 2, 0x0c00); // use `ip`
- relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, offset);
-
- // movt ip, #<upper 16 bits>
- write16(buf + 6, 0x0c00); // use `ip`
- relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, offset);
-
- write16(buf + 8, 0x44fc); // add ip, pc
- write16(buf + 10, 0xf8dc); // ldr.w pc, [ip] (bottom half)
- write16(buf + 12, 0xf000); // ldr.w pc, [ip] (upper half)
- write16(buf + 14, 0xe7fc); // Branch to previous instruction
+ uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
+ if (!llvm::isUInt<27>(offset)) {
+ // We cannot encode the Offset, use the long form.
+ writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);
+ return;
}
+ write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));
+ write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));
+ write32(buf + 8, pltData[2] | (offset & 0xfff));
+ memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary
}
void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {
- if (config->armThumbPLTs) {
- addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec);
- } else {
- addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
- addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);
- }
+ addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
+ addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);
}
bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
@@ -393,8 +325,6 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
case R_ARM_JUMP24:
// Source is ARM, all PLT entries are ARM so no interworking required.
// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
- assert(!config->armThumbPLTs &&
- "If the source is ARM, we should not need Thumb PLTs");
if (s.isFunc() && expr == R_PC && (s.getVA() & 1))
return true;
[[fallthrough]];
@@ -405,9 +335,9 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
}
case R_ARM_THM_JUMP19:
case R_ARM_THM_JUMP24:
- // Source is Thumb, when all PLT entries are ARM interworking is required.
+ // Source is Thumb, all PLT entries are ARM so interworking is required.
// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
- if ((expr == R_PLT_PC && !config->armThumbPLTs) || (s.isFunc() && (s.getVA() & 1) == 0))
+ if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0))
return true;
[[fallthrough]];
case R_ARM_THM_CALL: {
@@ -617,6 +547,7 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
// STT_FUNC we choose whether to write a BL or BLX depending on the
// value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is
// not of type STT_FUNC then we must preserve the original instruction.
+ // PLT entries are always ARM state so we know we don't need to interwork.
assert(rel.sym); // R_ARM_CALL is always reached via relocate().
bool bit0Thumb = val & 1;
bool isBlx = (read32(loc) & 0xfe000000) == 0xfa000000;
@@ -675,13 +606,12 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
// PLT entries are always ARM state so we know we need to interwork.
assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().
bool bit0Thumb = val & 1;
- bool useThumb = bit0Thumb || config->armThumbPLTs;
bool isBlx = (read16(loc + 2) & 0x1000) == 0;
// lld 10.0 and before always used bit0Thumb when deciding to write a BLX
- // even when type not STT_FUNC.
- if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == useThumb)
+ // even when type not STT_FUNC. PLT entries generated by LLD are always ARM.
+ if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb)
stateChangeWarning(loc, rel.type, *rel.sym);
- if ((rel.sym->isFunc() || rel.sym->isInPlt()) ? !useThumb : isBlx) {
+ if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) {
// We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
// the BLX instruction may only be two byte aligned. This must be done
// before overflow check.
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 883c4a2f84294..f0dfe7f377de0 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -217,7 +217,6 @@ struct Config {
bool allowMultipleDefinition;
bool fatLTOObjects;
bool androidPackDynRelocs = false;
- bool armThumbPLTs = false;
bool armHasBlx = false;
bool armHasMovtMovw = false;
bool armJ1J2BranchEncoding = false;
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index d760dddcf5ec5..1f496026d3ae2 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -194,18 +194,6 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) {
if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base &&
profile == ARMBuildAttrs::MicroControllerProfile)
config->armCMSESupport = true;
-
- // The thumb PLT entries require Thumb2 which can be used on multiple archs.
- // For now, let's limit it to ones where ARM isn't available and we know have
- // Thumb2.
- std::optional<unsigned> armISA =
- attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use);
- std::optional<unsigned> thumb =
- attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use);
- bool noArmISA = !armISA || *armISA == ARMBuildAttrs::Not_Allowed;
- bool hasThumb2 = thumb && *thumb >= ARMBuildAttrs::AllowThumb32;
- if (noArmISA && hasThumb2)
- config->armThumbPLTs = true;
}
InputFile::InputFile(Kind k, MemoryBufferRef m)
diff --git a/lld/test/ELF/armv8-thumb-plt-reloc.s b/lld/test/ELF/armv8-thumb-plt-reloc.s
deleted file mode 100644
index 47cd5c1b741ee..0000000000000
--- a/lld/test/ELF/armv8-thumb-plt-reloc.s
+++ /dev/null
@@ -1,126 +0,0 @@
-// REQUIRES: arm
-// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumb --mcpu=cortex-m33 %p/Inputs/arm-plt-reloc.s -o %t1
-// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumb --mcpu=cortex-m33 %s -o %t2
-// RUN: ld.lld %t1 %t2 -o %t
-// RUN: llvm-objdump --no-print-imm-hex -d %t | FileCheck %s
-// RUN: ld.lld -shared %t1 %t2 -o %t.so
-// RUN: llvm-objdump --no-print-imm-hex -d %t.so | FileCheck --check-prefix=DSO %s
-// RUN: llvm-readelf -S -r %t.so | FileCheck -check-prefix=DSOREL %s
-
-// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumbeb --mcpu=cortex-m33 %p/Inputs/arm-plt-reloc.s -o %t1.be
-// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumbeb --mcpu=cortex-m33 %s -o %t2.be
-// RUN: ld.lld %t1.be %t2.be -o %t.be
-// RUN: llvm-objdump --no-print-imm-hex -d %t.be | FileCheck %s
-// RUN: ld.lld -shared %t1.be %t2.be -o %t.so.be
-// RUN: llvm-objdump --no-print-imm-hex -d %t.so.be | FileCheck --check-prefix=DSO %s
-// RUN: llvm-readelf -S -r %t.so.be | FileCheck -check-prefix=DSOREL %s
-
-// RUN: ld.lld --be8 %t1.be %t2.be -o %t.be
-// RUN: llvm-objdump --no-print-imm-hex -d %t.be | FileCheck %s
-// RUN: ld.lld --be8 -shared %t1.be %t2.be -o %t.so.be
-// RUN: llvm-objdump --no-print-imm-hex -d %t.so.be | FileCheck --check-prefix=DSO %s
-// RUN: llvm-readelf -S -r %t.so.be | FileCheck -check-prefix=DSOREL %s
-
-/// Test PLT entry generation
- .text
- .align 2
- .globl _start
- .type _start,%function
-_start:
- bl func1
- bl func2
- bl func3
- b.w func1
- b.w func2
- b.w func3
- beq.w func1
- beq.w func2
- beq.w func3
-
-/// Executable, expect no PLT
-// CHECK: Disassembly of section .text:
-// CHECK-EMPTY:
-// CHECK-NEXT: <func1>:
-// CHECK-NEXT: bx lr
-// CHECK: <func2>:
-// CHECK-NEXT: bx lr
-// CHECK: <func3>:
-// CHECK-NEXT: bx lr
-// CHECK-NEXT: d4d4
-// CHECK: <_start>:
-// CHECK-NEXT: bl {{.*}} <func1>
-// CHECK-NEXT: bl {{.*}} <func2>
-// CHECK-NEXT: bl {{.*}} <func3>
-// CHECK-NEXT: b.w {{.*}} <func1>
-// CHECK-NEXT: b.w {{.*}} <func2>
-// CHECK-NEXT: b.w {{.*}} <func3>
-// CHECK-NEXT: beq.w {{.*}} <func1>
-// CHECK-NEXT: beq.w {{.*}} <func2>
-// CHECK-NEXT: beq.w {{.*}} <func3>
-
-// DSO: Disassembly of section .text:
-// DSO-EMPTY:
-// DSO-NEXT: <func1>:
-// DSO-NEXT: bx lr
-// DSO: <func2>:
-// DSO-NEXT: bx lr
-// DSO: <func3>:
-// DSO-NEXT: bx lr
-// DSO-NEXT: d4d4
-// DSO: <_start>:
-/// 0x10260 = PLT func1
-// DSO-NEXT: bl 0x10260
-/// 0x10270 = PLT func2
-// DSO-NEXT: bl 0x10270
-/// 0x10280 = PLT func3
-// DSO-NEXT: bl 0x10280
-/// 0x10260 = PLT func1
-// DSO-NEXT: b.w 0x10260
-/// 0x10270 = PLT func2
-// DSO-NEXT: b.w 0x10270
-/// 0x10280 = PLT func3
-// DSO-NEXT: b.w 0x10280
-/// 0x10260 = PLT func1
-// DSO-NEXT: beq.w 0x10260
-/// 0x10270 = PLT func2
-// DSO-NEXT: beq.w 0x10270
-/// 0x10280 = PLT func3
-// DSO-NEXT: beq.w 0x10280
-// DSO: Disassembly of section .plt:
-// DSO-EMPTY:
-// DSO-NEXT: 10240 <.plt>:
-// DSO-NEXT: push {lr}
-// DSO-NEXT: ldr.w lr, [pc, #8]
-// DSO-NEXT: add lr, pc
-// DSO-NEXT: ldr pc, [lr, #8]!
-/// 0x20098 = .got.plt (0x302D8) - pc (0x10238 = .plt + 8) - 8
-// DSO-NEXT: .word 0x00020098
-// DSO-NEXT: .word 0xd4d4d4d4
-// DSO-NEXT: .word 0xd4d4d4d4
-// DSO-NEXT: .word 0xd4d4d4d4
-// DSO-NEXT: .word 0xd4d4d4d4
-
-/// 136 + 2 << 16 + 0x1026c = 0x302f4 = got entry 1
-// DSO-NEXT: 10260: f240 0c88 movw r12, #136
-// DSO-NEXT: f2c0 0c02 movt r12, #2
-// DSO-NEXT: 44fc add r12, pc
-// DSO-NEXT: f8dc f000 ldr.w pc, [r12]
-// DSO-NEXT: e7fc b 0x1026a
-/// 124 + 2 << 16 + 0x1027c = 0x302f8 = got entry 2
-// DSO-NEXT: 10270: f240 0c7c movw r12, #124
-// DSO-NEXT: f2c0 0c02 movt r12, #2
-// DSO-NEXT: 44fc add r12, pc
-// DSO-NEXT: f8dc f000 ldr.w pc, [r12]
-// DSO-NEXT: e7fc b 0x1027a
-/// 112 + 2 << 16 + 0x1028c = 0x302fc = got entry 3
-// DSO-NEXT: 10280: f240 0c70 movw r12, #112
-// DSO-NEXT: f2c0 0c02 movt r12, #2
-// DSO-NEXT: 44fc add r12, pc
-// DSO-NEXT: f8dc f000 ldr.w pc, [r12]
-// DSO-NEXT: e7fc b 0x1028a
-
-// DSOREL: .got.plt PROGBITS 000302e8 {{.*}} 000018 00 WA 0 0 4
-// DSOREL: Relocation section '.rel.plt'
-// DSOREL: 000302f4 {{.*}} R_ARM_JUMP_SLOT {{.*}} func1
-// DSOREL: 000302f8 {{.*}} R_ARM_JUMP_SLOT {{.*}} func2
-// DSOREL: 000302fc {{.*}} R_ARM_JUMP_SLOT {{.*}} func3
More information about the llvm-commits
mailing list