[lld] e91447f - Fix lld crash using --fix-cortex-a53-843419 (#170495)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 9 03:08:13 PST 2026
Author: TarcĂsio Fischer
Date: 2026-01-09T11:08:09Z
New Revision: e91447f44d303222d9bfcbd404cd152966c01630
URL: https://github.com/llvm/llvm-project/commit/e91447f44d303222d9bfcbd404cd152966c01630
DIFF: https://github.com/llvm/llvm-project/commit/e91447f44d303222d9bfcbd404cd152966c01630.diff
LOG: Fix lld crash using --fix-cortex-a53-843419 (#170495)
Original crash was observed in Chromium, in [1]. The problem occurs in
elf::isAArch64BTILandingPad because it didn't handle synthetic sections,
which can have a nullptr as a buf, so it crashed while trying to read
that buf.
After fixing that, a second issue occurs: When the patched code grows
too
much, it gets far away from the short jump, and the current
implementation
assumes a R_AARCH64_JUMP26 will be enough.
This PR changes the implementation to:
(a) In isAArch64BTILandingPad, checks if a section is synthetic, and
assumes that it'll NOT contain a landing pad, avoiding the buffer check;
(b) Suppress the size rounding for thunks that preceeds section
(Making the situation less likely to happen);
(c) Reimplements the patch by using a R_AARCH64_ABS64 in case the
patched code is still far away.
[1] https://issues.chromium.org/issues/440019454
---------
Co-authored-by: Tarcisio Fischer <tarcisio.fischer at arm.com>
Added:
lld/test/ELF/aarch64-cortex-a53-843419-thunk-relocation-crash.s
Modified:
lld/ELF/AArch64ErrataFix.cpp
lld/ELF/AArch64ErrataFix.h
lld/ELF/Arch/AArch64.cpp
lld/ELF/Relocations.cpp
lld/ELF/Relocations.h
Removed:
################################################################################
diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp
index fe8869d237b4d..a2ab58f96b2b5 100644
--- a/lld/ELF/AArch64ErrataFix.cpp
+++ b/lld/ELF/AArch64ErrataFix.cpp
@@ -370,7 +370,8 @@ static uint64_t scanCortexA53Errata843419(InputSection *isec, uint64_t &off,
class elf::Patch843419Section final : public SyntheticSection {
public:
- Patch843419Section(Ctx &, InputSection *p, uint64_t off);
+ Patch843419Section(Ctx &, InputSection *p, uint64_t off,
+ Defined *patcheeCodeSym);
void writeTo(uint8_t *buf) override;
@@ -390,7 +391,8 @@ class elf::Patch843419Section final : public SyntheticSection {
Symbol *patchSym;
};
-Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off)
+Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off,
+ Defined *patcheeCodeSym)
: SyntheticSection(ctx, ".text.patch", SHT_PROGBITS,
SHF_ALLOC | SHF_EXECINSTR, 4),
patchee(p), patcheeOffset(off) {
@@ -399,6 +401,9 @@ Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off)
ctx, ctx.saver.save("__CortexA53843419_" + utohexstr(getLDSTAddr())),
STT_FUNC, 0, getSize(), *this);
addSyntheticLocal(ctx, ctx.saver.save("$x"), STT_NOTYPE, 0, 0, *this);
+ int64_t retToPatcheeSymOffset =
+ getLDSTAddr() - p->getVA(patcheeCodeSym->value) + 4;
+ addReloc({R_PC, R_AARCH64_JUMP26, 4, retToPatcheeSymOffset, patcheeCodeSym});
}
uint64_t Patch843419Section::getLDSTAddr() const {
@@ -410,13 +415,8 @@ void Patch843419Section::writeTo(uint8_t *buf) {
// patchee Section.
write32le(buf, read32le(patchee->content().begin() + patcheeOffset));
- // Apply any relocation transferred from the original patchee section.
+ // Apply relocations
ctx.target->relocateAlloc(*this, buf);
-
- // Return address is the next instruction after the one we have just copied.
- uint64_t s = getLDSTAddr() + 4;
- uint64_t p = patchSym->getVA(ctx) + 4;
- ctx.target->relocateNoSym(buf + 4, R_AARCH64_JUMP26, s - p);
}
void AArch64Err843419Patcher::init() {
@@ -443,11 +443,14 @@ void AArch64Err843419Patcher::init() {
auto *def = dyn_cast<Defined>(b);
if (!def)
continue;
- if (!isCodeMapSymbol(def) && !isDataMapSymbol(def))
+ if (!def->isSection() && !isCodeMapSymbol(def) && !isDataMapSymbol(def))
continue;
- if (auto *sec = dyn_cast_or_null<InputSection>(def->section))
- if (sec->flags & SHF_EXECINSTR)
- sectionMap[sec].push_back(def);
+ if (auto *sec = dyn_cast_or_null<InputSection>(def->section)) {
+ if (def->isSection())
+ sectionMap[sec].first = def;
+ else if (sec->flags & SHF_EXECINSTR)
+ sectionMap[sec].second.push_back(def);
+ }
}
}
// For each InputSection make sure the mapping symbols are in sorted in
@@ -455,7 +458,7 @@ void AArch64Err843419Patcher::init() {
// the same type. For example we must remove the redundant $d.1 from $x.0
// $d.0 $d.1 $x.1.
for (auto &kv : sectionMap) {
- std::vector<const Defined *> &mapSyms = kv.second;
+ auto &mapSyms = kv.second.second;
llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {
return a->value < b->value;
});
@@ -529,7 +532,8 @@ void AArch64Err843419Patcher::insertPatches(
// Patches that we need to insert.
static void implementPatch(Ctx &ctx, uint64_t adrpAddr, uint64_t patcheeOffset,
InputSection *isec,
- std::vector<Patch843419Section *> &patches) {
+ std::vector<Patch843419Section *> &patches,
+ Defined *patcheeCodeSym) {
// There may be a relocation at the same offset that we are patching. There
// are four cases that we need to consider.
// Case 1: R_AARCH64_JUMP26 branch relocation. We have already patched this
@@ -554,7 +558,7 @@ static void implementPatch(Ctx &ctx, uint64_t adrpAddr, uint64_t patcheeOffset,
Log(ctx) << "detected cortex-a53-843419 erratum sequence starting at " <<
utohexstr(adrpAddr) << " in unpatched output.";
- auto *ps = make<Patch843419Section>(ctx, isec, patcheeOffset);
+ auto *ps = make<Patch843419Section>(ctx, isec, patcheeOffset, patcheeCodeSym);
patches.push_back(ps);
auto makeRelToPatch = [](uint64_t offset, Symbol *patchSym) {
@@ -584,7 +588,11 @@ AArch64Err843419Patcher::patchInputSectionDescription(
// mapping symbols of the same type. Our range of executable instructions to
// scan is therefore [codeSym->value, dataSym->value) or [codeSym->value,
// section size).
- std::vector<const Defined *> &mapSyms = sectionMap[isec];
+ auto [it, inserted] = sectionMap.try_emplace(
+ isec, std::make_pair(nullptr, SmallVector<Defined *, 0>{}));
+ auto &[sectionSym, mapSyms] = it->second;
+ if (inserted || sectionSym == nullptr)
+ sectionSym = addSyntheticLocal(ctx, "", STT_SECTION, 0, 0, *isec);
auto codeSym = mapSyms.begin();
while (codeSym != mapSyms.end()) {
@@ -597,7 +605,8 @@ AArch64Err843419Patcher::patchInputSectionDescription(
uint64_t startAddr = isec->getVA(off);
if (uint64_t patcheeOffset =
scanCortexA53Errata843419(isec, off, limit))
- implementPatch(ctx, startAddr, patcheeOffset, isec, patches);
+ implementPatch(ctx, startAddr, patcheeOffset, isec, patches,
+ sectionSym);
}
if (dataSym == mapSyms.end())
break;
diff --git a/lld/ELF/AArch64ErrataFix.h b/lld/ELF/AArch64ErrataFix.h
index cab0b04336982..8e0d305eb9ccd 100644
--- a/lld/ELF/AArch64ErrataFix.h
+++ b/lld/ELF/AArch64ErrataFix.h
@@ -36,10 +36,13 @@ class AArch64Err843419Patcher {
void init();
Ctx &ctx;
- // A cache of the mapping symbols defined by the InputSection sorted in order
- // of ascending value with redundant symbols removed. These describe
- // the ranges of code and data in an executable InputSection.
- llvm::DenseMap<InputSection *, std::vector<const Defined *>> sectionMap;
+ // A cache mapping InputSections to pairs of section symbols (first) and
+ // the mapping symbols (second) defined by the InputSection sorted in order
+ // of ascending value with redundant symbols removed. These describe the
+ // ranges of code and data in an executable InputSection.
+ llvm::DenseMap<InputSection *,
+ std::pair<Defined *, SmallVector<Defined *, 0>>>
+ sectionMap;
bool initialized = false;
};
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 34cca88ae63b0..55434ae2151ca 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -48,6 +48,11 @@ bool elf::isAArch64BTILandingPad(Ctx &ctx, Symbol &s, int64_t a) {
if (off >= isec->getSize())
return true;
const uint8_t *buf = isec->content().begin();
+ // Synthetic sections may have a size but empty data - Assume that they won't
+ // contain a landing pad
+ if (buf == nullptr && isa<SyntheticSection>(isec))
+ return false;
+
const uint32_t instr = read32le(buf + off);
// All BTI instructions are HINT instructions which all have same encoding
// apart from bits [11:5]
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index d60216da2b03f..560fd8214720d 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1930,7 +1930,7 @@ ThunkSection *ThunkCreator::getISThunkSec(InputSection *isec) {
if (isec->outSecOff < first->outSecOff || last->outSecOff < isec->outSecOff)
continue;
- ts = addThunkSection(tos, isd, isec->outSecOff);
+ ts = addThunkSection(tos, isd, isec->outSecOff, /*isPrefix=*/true);
thunkedSections[isec] = ts;
return ts;
}
@@ -1989,11 +1989,11 @@ void ThunkCreator::createInitialThunkSections(
ThunkSection *ThunkCreator::addThunkSection(OutputSection *os,
InputSectionDescription *isd,
- uint64_t off) {
+ uint64_t off, bool isPrefix) {
auto *ts = make<ThunkSection>(ctx, os, off);
ts->partition = os->partition;
if ((ctx.arg.fixCortexA53Errata843419 || ctx.arg.fixCortexA8) &&
- !isd->sections.empty()) {
+ !isd->sections.empty() && !isPrefix) {
// The errata fixes are sensitive to addresses modulo 4 KiB. When we add
// thunks we disturb the base addresses of sections placed after the thunks
// this makes patches we have generated redundant, and may cause us to
@@ -2017,6 +2017,12 @@ ThunkSection *ThunkCreator::addThunkSection(OutputSection *os,
// 2.) The InputSectionDescription is larger than 4 KiB. This will prevent
// any assertion failures that an InputSectionDescription is < 4 KiB
// in size.
+ //
+ // isPrefix is a ThunkSection explicitly inserted before its target
+ // section. We suppress the rounding up of the size of these ThunkSections
+ // as unlike normal ThunkSections, they are small in size, but when BTI is
+ // enabled very frequent. This can bloat code-size and push the errata
+ // patches out of branch range.
uint64_t isdSize = isd->sections.back()->outSecOff +
isd->sections.back()->getSize() -
isd->sections.front()->outSecOff;
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 4cb09f329953a..0cba1fa11d234 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -204,7 +204,7 @@ class ThunkCreator {
std::pair<Thunk *, bool> getSyntheticLandingPad(Defined &d, int64_t a);
ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
- uint64_t off);
+ uint64_t off, bool isPrefix = false);
bool normalizeExistingThunk(Relocation &rel, uint64_t src);
diff --git a/lld/test/ELF/aarch64-cortex-a53-843419-thunk-relocation-crash.s b/lld/test/ELF/aarch64-cortex-a53-843419-thunk-relocation-crash.s
new file mode 100644
index 0000000000000..94477c3312d0c
--- /dev/null
+++ b/lld/test/ELF/aarch64-cortex-a53-843419-thunk-relocation-crash.s
@@ -0,0 +1,107 @@
+// REQUIRES: aarch64
+// RUN: rm -rf %t && split-file %s %t && cd %t
+// RUN: llvm-mc -mattr=+bti -filetype=obj -triple=aarch64 asm -o a.o
+// RUN: ld.lld --script lds -fix-cortex-a53-843419 -verbose a.o -o exe \
+// RUN: 2>&1 | FileCheck -check-prefix=CHECK-PRINT %s
+// RUN: llvm-objdump --no-print-imm-hex --no-show-raw-insn --triple=aarch64-linux-gnu -d exe | FileCheck %s
+
+/// Test case for specific crash wrt interaction between thunks and errata
+/// patches where the size of the added thunks meant that a range-extension
+/// thunk to the patch was required. We need to check that a BTI Thunk is
+/// generated for the patch, and that the patch's direct branch return is also
+/// range extended, possibly needing another BTI Thunk.
+///
+/// The asm below is based on a crash that was happening in Chromium.
+/// For more information see https://issues.chromium.org/issues/440019454
+
+//--- asm
+.section .note.gnu.property,"a"
+.p2align 3
+.long 4
+.long 0x10 // descriptor length
+.long 0x5 // GNU property type
+.asciz "GNU"
+.long 0xc0000000 // GNU_PROPERTY_AARCH64_FEATURE_1_AND
+.long 4
+.long 1 // GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+.long 0
+
+ .section .text.01, "ax", %progbits
+ .balign 4096
+ .globl _start
+ .type _start, %function
+_start:
+ bl far_away_no_bti
+
+ .section .text.far, "ax", %progbits
+ .globl far_away_no_bti
+ .type far_away, function
+far_away_no_bti:
+ .space 4096 - 28, 0
+ adrp x0, dat
+ ldr x1, [x1, #0]
+ ldr x0, [x0, :got_lo12:dat]
+ .space 0x8000000, 0
+ ret
+
+ .section .data
+ .globl dat
+dat: .quad 0
+
+// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 8010FFC in unpatched output.
+
+// CHECK: 0000000000010000 <_start>:
+// CHECK-NEXT: 10000: bl 0x10008 <__AArch64AbsLongThunk_far_away_no_bti>
+
+// CHECK: <__AArch64AbsLongThunk_far_away_no_bti>:
+// CHECK-NEXT: 10008: ldr x16, 0x10010
+// CHECK-NEXT: br x16
+// CHECK-NEXT: 10010: 18 00 01 08 .word 0x08010018
+
+// Check that the BTI thunks do NOT have their size rounded up to 4 KiB.
+// They precede the patch and they contain the landing pad.
+// CHECK: <__AArch64BTIThunk_far_away_no_bti>:
+// CHECK-NEXT: 8010018: bti c
+// CHECK-NEXT: b 0x8010038 <far_away_no_bti>
+
+// CHECK: <__AArch64AbsLongThunk___CortexA53843419_8011004>:
+// CHECK-NEXT: 8010020: ldr x16, 0x8010028
+// CHECK-NEXT: br x16
+// CHECK-NEXT: 8010028: 34 10 01 10 .word 0x10011034
+
+// CHECK: <__AArch64BTIThunk_>:
+// CHECK-NEXT: 8010030: bti c
+// CHECK-NEXT: b 0x8011028 <far_away_no_bti+0xff0>
+
+// CHECK: 8010038 <far_away_no_bti>:
+// CHECK-NEXT: ...
+// CHECK-NEXT: 801101c: adrp x0, 0x10012000
+// CHECK-NEXT: ldr x1, [x1]
+// CHECK-NEXT: b 0x8010020 <__AArch64AbsLongThunk___CortexA53843419_8011004>
+// CHECK-NEXT: ...
+// CHECK-NEXT: 10011028: ret
+
+// Check that the errata thunk does NOT contain a landing pad
+// CHECK: <__CortexA53843419_8011004>:
+// CHECK-NEXT: 1001102c: ldr x0, [x0, #64]
+// CHECK-NEXT: b 0x10011040 <__AArch64AbsLongThunk_>
+
+// Rest of generated code for readability
+// CHECK: <__AArch64BTIThunk___CortexA53843419_8011004>:
+// CHECK-NEXT: 10011034: bti c
+// CHECK-NEXT: b 0x1001102c <__CortexA53843419_8011004>
+
+// CHECK: <__AArch64AbsLongThunk_>
+// CHECK-NEXT: 10011040: ldr x16, 0x10011048
+// CHECK-NEXT: br x16
+// CHECK-NEXT: 10011048: 30 00 01 08 .word 0x08010030
+
+//--- lds
+SECTIONS {
+ .text 0x10000 : {
+ *(.text.01);
+ . += 0x8000000;
+ *(.text.far);
+ }
+}
+
More information about the llvm-commits
mailing list