[llvm] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize binary (PR #171149)
Gergely Bálint via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 19 08:32:46 PST 2025
https://github.com/bgergely0 updated https://github.com/llvm/llvm-project/pull/171149
>From 8922788c7d30d7b6edaead6c5239b621bdf03870 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Wed, 3 Sep 2025 13:29:39 +0000
Subject: [PATCH 1/4] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to
optimize binary
This patch adds BTI landing pads to ShortJmp/LongJmp targets in the
LongJmp pass when optimizing BTI binaries.
BOLT does not have the ability to add BTI to all types of functions.
This patch aims to insert the landing pad where possible, and emit an
error where it currently is not.
BOLT cannot insert BTIs into several function "types", including:
- ignored functions,
- PLT functions,
- other functions without a CFG.
Additional context:
In #161206, BOLT gained the ability to decode the .note.gnu.property
section, and warn about lack of BTI support for BOLT. However, this
warning is misleading: the emitted binary may not need extra BTI landing
pads.
With this patch, the emitted binary will be "BTI-safe".
---
bolt/include/bolt/Core/BinaryBasicBlock.h | 2 +
bolt/lib/Passes/LongJmp.cpp | 53 +++++++++++++++++++++--
bolt/lib/Rewrite/GNUPropertyRewriter.cpp | 3 +-
bolt/test/AArch64/bti-note.test | 4 +-
bolt/test/AArch64/long-jmp-bti-ignored.s | 35 +++++++++++++++
bolt/test/AArch64/long-jmp-bti.s | 46 ++++++++++++++++++++
bolt/test/AArch64/no-bti-note.test | 4 +-
7 files changed, 138 insertions(+), 9 deletions(-)
create mode 100644 bolt/test/AArch64/long-jmp-bti-ignored.s
create mode 100644 bolt/test/AArch64/long-jmp-bti.s
diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h
index 629f0ce8314dc..2be30c14bf90b 100644
--- a/bolt/include/bolt/Core/BinaryBasicBlock.h
+++ b/bolt/include/bolt/Core/BinaryBasicBlock.h
@@ -890,6 +890,8 @@ class BinaryBasicBlock {
/// Needed by graph traits.
BinaryFunction *getParent() const { return getFunction(); }
+ bool hasParent() const { return getFunction() != nullptr; }
+
/// Return true if the containing function is in CFG state.
bool hasCFG() const;
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index a9f81a0480138..1295f0c25c399 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -470,8 +470,8 @@ uint64_t LongJmpPass::getSymbolAddress(const BinaryContext &BC,
}
Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) {
- const BinaryFunction &Func = *StubBB.getFunction();
- const BinaryContext &BC = Func.getBinaryContext();
+ BinaryFunction &Func = *StubBB.getFunction();
+ BinaryContext &BC = Func.getBinaryContext();
const int Bits = StubBits[&StubBB];
// Already working with the largest range?
if (Bits == static_cast<int>(BC.AsmInfo->getCodePointerSize() * 8))
@@ -484,11 +484,54 @@ Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) {
~((1ULL << (RangeSingleInstr - 1)) - 1);
const MCSymbol *RealTargetSym = BC.MIB->getTargetSymbol(*StubBB.begin());
- const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(RealTargetSym);
+ BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(RealTargetSym);
+ BinaryFunction *TargetFunction = BC.getFunctionForSymbol(RealTargetSym);
uint64_t TgtAddress = getSymbolAddress(BC, RealTargetSym, TgtBB);
uint64_t DotAddress = BBAddresses[&StubBB];
uint64_t PCRelTgtAddress = DotAddress > TgtAddress ? DotAddress - TgtAddress
: TgtAddress - DotAddress;
+
+ auto applyBTIFixup = [&](BinaryFunction *TargetFunction,
+ BinaryBasicBlock *RealTgtBB) {
+ // TODO: add support for editing each type, and remove errors.
+ if (!TargetFunction && !RealTgtBB) {
+ BC.errs() << "BOLT-ERROR: Cannot add BTI to function with symbol "
+ << RealTargetSym->getName() << "\n";
+ exit(1);
+ }
+ if (TargetFunction && TargetFunction->isIgnored()) {
+ BC.errs() << "BOLT-ERROR: Cannot add BTI landing pad to ignored function "
+ << TargetFunction->getPrintName() << "\n";
+ exit(1);
+ }
+ if (TargetFunction && !TargetFunction->hasCFG()) {
+ auto FirstII = TargetFunction->instrs().begin();
+ MCInst FirstInst = FirstII->second;
+ if (BC.MIB->isBTIVariantCoveringCall(FirstInst,
+ *StubBB.getLastNonPseudoInstr()))
+ return;
+ BC.errs()
+ << "BOLT-ERROR: Cannot add BTI landing pad to function without CFG: "
+ << TargetFunction->getPrintName() << "\n";
+ exit(1);
+ }
+ if (!RealTgtBB)
+ // !RealTgtBB -> TargetFunction is not a nullptr
+ RealTgtBB = &*TargetFunction->begin();
+ if (RealTgtBB) {
+ if (!RealTgtBB->hasParent()) {
+ BC.errs() << "BOLT-ERROR: Cannot add BTI to block with no parent "
+ "function. Targeted symbol: "
+ << RealTargetSym->getName() << "\n";
+ exit(1);
+ }
+ // The BR is the last inst of the StubBB.
+ BC.MIB->insertBTI(*RealTgtBB, *StubBB.getLastNonPseudoInstr());
+ return;
+ }
+ BC.errs() << "BOLT-ERROR: unhandled case when applying BTI fixup\n";
+ exit(1);
+ };
// If it fits in one instruction, do not relax
if (!(PCRelTgtAddress & SingleInstrMask))
return Error::success();
@@ -503,6 +546,8 @@ Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) {
<< " RealTargetSym = " << RealTargetSym->getName()
<< "\n");
relaxStubToShortJmp(StubBB, RealTargetSym);
+ if (BC.usesBTI())
+ applyBTIFixup(TargetFunction, TgtBB);
StubBits[&StubBB] = RangeShortJmp;
Modified = true;
return Error::success();
@@ -518,6 +563,8 @@ Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) {
<< Twine::utohexstr(PCRelTgtAddress)
<< " RealTargetSym = " << RealTargetSym->getName() << "\n");
relaxStubToLongJmp(StubBB, RealTargetSym);
+ if (BC.usesBTI())
+ applyBTIFixup(TargetFunction, TgtBB);
StubBits[&StubBB] = static_cast<int>(BC.AsmInfo->getCodePointerSize() * 8);
Modified = true;
return Error::success();
diff --git a/bolt/lib/Rewrite/GNUPropertyRewriter.cpp b/bolt/lib/Rewrite/GNUPropertyRewriter.cpp
index f61c08ec46fe6..cdf4e6dc453f4 100644
--- a/bolt/lib/Rewrite/GNUPropertyRewriter.cpp
+++ b/bolt/lib/Rewrite/GNUPropertyRewriter.cpp
@@ -75,8 +75,7 @@ Error GNUPropertyRewriter::sectionInitializer() {
if (BC.isAArch64()) {
BC.setUsesBTI(FeaturesAcc & llvm::ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
if (BC.usesBTI())
- BC.outs() << "BOLT-WARNING: binary is using BTI. Optimized binary may be "
- "corrupted\n";
+ BC.outs() << "BOLT-INFO: binary is using BTI.\n";
}
return Error::success();
diff --git a/bolt/test/AArch64/bti-note.test b/bolt/test/AArch64/bti-note.test
index 1ec9d774b3271..8cd061882b4d4 100644
--- a/bolt/test/AArch64/bti-note.test
+++ b/bolt/test/AArch64/bti-note.test
@@ -1,5 +1,5 @@
// This test checks that the GNUPropertyRewriter can decode the BTI feature flag.
-// It decodes an executable with BTI, and checks for the warning.
+// It decodes an executable with BTI, and checks for the message.
RUN: yaml2obj %p/Inputs/property-note-bti.yaml &> %t.exe
@@ -7,4 +7,4 @@ RUN: llvm-readelf -n %t.exe | FileCheck %s
CHECK: BTI
RUN: llvm-bolt %t.exe -o %t.exe.bolt | FileCheck %s -check-prefix=CHECK-BOLT
-CHECK-BOLT: BOLT-WARNING: binary is using BTI. Optimized binary may be corrupted
+CHECK-BOLT: BOLT-INFO: binary is using BTI.
diff --git a/bolt/test/AArch64/long-jmp-bti-ignored.s b/bolt/test/AArch64/long-jmp-bti-ignored.s
new file mode 100644
index 0000000000000..e94c29b45e2ee
--- /dev/null
+++ b/bolt/test/AArch64/long-jmp-bti-ignored.s
@@ -0,0 +1,35 @@
+# This test checks the situation where LongJmp adds a stub targeting an ignored (skipped) function.
+# The problem is that by default BOLT cannot modify ignored functions, so it cannot add the needed BTI.
+
+# Current behaviour is to emit an error.
+
+# REQUIRES: system-linux, asserts
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: -mattr=+bti -aarch64-mark-bti-property %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q -Wl,-z,force-bti
+# RUN: not llvm-bolt %t.exe -o %t.bolt \
+# RUN: --align-text=0x10000000 --skip-funcs=far_away_func 2>&1 | FileCheck %s
+
+# CHECK: BOLT-ERROR: Cannot add BTI landing pad to ignored function far_away_func
+
+ .section .text
+ .global _start
+ .global far_away_func
+
+ .align 4
+ .global _start
+ .type _start, %function
+_start:
+ bti c
+ bl far_away_func
+ ret
+
+ .global far_away_func
+ .type far_away_func, %function
+far_away_func:
+ add x0, x0, #1
+ ret
+
+.reloc 0, R_AARCH64_NONE
+
diff --git a/bolt/test/AArch64/long-jmp-bti.s b/bolt/test/AArch64/long-jmp-bti.s
new file mode 100644
index 0000000000000..90350ab9cf14a
--- /dev/null
+++ b/bolt/test/AArch64/long-jmp-bti.s
@@ -0,0 +1,46 @@
+# This test checks that BOLT can generate BTI landing pads for targets of stubs inserted in LongJmp.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %s %cflags -Wl,-q -o %t -mbranch-protection=bti -Wl,-z,force-bti
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions \
+# RUN: --print-split --print-only foo --print-longjmp 2>&1 | FileCheck %s
+
+#CHECK: BOLT-INFO: Starting stub-insertion pass
+#CHECK: Binary Function "foo" after long-jmp
+
+#CHECK: cmp x0, #0x0
+#CHECK-NEXT: Successors: .LStub0
+
+#CHECK: adrp x16, .Ltmp0
+#CHECK-NEXT: add x16, x16, :lo12:.Ltmp0
+#CHECK-NEXT: br x16 # UNKNOWN CONTROL FLOW
+
+#CHECK: ------- HOT-COLD SPLIT POINT -------
+
+#CHECK: bti c
+#CHECK-NEXT: mov x0, #0x2
+#CHECK-NEXT: ret
+
+ .text
+ .globl foo
+ .type foo, %function
+foo:
+.cfi_startproc
+.entry_bb:
+# FDATA: 1 foo #.entry_bb# 10
+ cmp x0, #0
+ b .Lcold_bb1
+.Lcold_bb1:
+ mov x0, #2
+ ret
+.cfi_endproc
+ .size foo, .-foo
+
+# empty space, so the splitting needs short stubs
+.data
+.space 0x8000000
+
+## Force relocation mode.
+.reloc 0, R_AARCH64_NONE
diff --git a/bolt/test/AArch64/no-bti-note.test b/bolt/test/AArch64/no-bti-note.test
index 28cce345deaab..d0ee5a62d2ead 100644
--- a/bolt/test/AArch64/no-bti-note.test
+++ b/bolt/test/AArch64/no-bti-note.test
@@ -1,5 +1,5 @@
// This test checks that the GNUPropertyRewriter can decode the BTI feature flag.
-// It decodes an executable without BTI, and checks for the warning.
+// It decodes an executable without BTI, and checks that the BTI message is not emitted.
RUN: yaml2obj %p/Inputs/property-note-nobti.yaml &> %t.exe
@@ -7,4 +7,4 @@ RUN: llvm-readelf -n %t.exe | FileCheck %s
CHECK-NOT: BTI
RUN: llvm-bolt %t.exe -o %t.exe.bolt | FileCheck %s -check-prefix=CHECK-BOLT
-CHECK-BOLT-NOT: BOLT-WARNING: binary is using BTI. Optimized binary may be corrupted
+CHECK-BOLT-NOT: BOLT-INFO: binary is using BTI.
>From 740f0251f48ea047519d51d631aaca106d3ae915 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Tue, 9 Dec 2025 11:57:48 +0000
Subject: [PATCH 2/4] [BOLT] Fix param order
---
bolt/lib/Passes/LongJmp.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index 1295f0c25c399..3dbb5011c9bcc 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -507,8 +507,8 @@ Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) {
if (TargetFunction && !TargetFunction->hasCFG()) {
auto FirstII = TargetFunction->instrs().begin();
MCInst FirstInst = FirstII->second;
- if (BC.MIB->isBTIVariantCoveringCall(FirstInst,
- *StubBB.getLastNonPseudoInstr()))
+ if (BC.MIB->isCallCoveredByBTI(*StubBB.getLastNonPseudoInstr(),
+ FirstInst))
return;
BC.errs()
<< "BOLT-ERROR: Cannot add BTI landing pad to function without CFG: "
>From 6e18e8c348efab0c9cc18164c7ae3dec04341973 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Fri, 19 Dec 2025 16:25:41 +0000
Subject: [PATCH 3/4] [BOLT] Updates
- fix format
- add comments
---
bolt/include/bolt/Core/BinaryBasicBlock.h | 2 +-
bolt/test/AArch64/long-jmp-bti-ignored.s | 3 +++
bolt/test/AArch64/long-jmp-bti.s | 22 +++++++++++-----------
3 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h
index 2be30c14bf90b..c0f82e0f1842a 100644
--- a/bolt/include/bolt/Core/BinaryBasicBlock.h
+++ b/bolt/include/bolt/Core/BinaryBasicBlock.h
@@ -890,7 +890,7 @@ class BinaryBasicBlock {
/// Needed by graph traits.
BinaryFunction *getParent() const { return getFunction(); }
- bool hasParent() const { return getFunction() != nullptr; }
+ bool hasParent() const { return getParent() != nullptr; }
/// Return true if the containing function is in CFG state.
bool hasCFG() const;
diff --git a/bolt/test/AArch64/long-jmp-bti-ignored.s b/bolt/test/AArch64/long-jmp-bti-ignored.s
index e94c29b45e2ee..da6ad3bb7a333 100644
--- a/bolt/test/AArch64/long-jmp-bti-ignored.s
+++ b/bolt/test/AArch64/long-jmp-bti-ignored.s
@@ -25,6 +25,9 @@ _start:
bl far_away_func
ret
+# This is skipped, so it stays in the .bolt.org.text.
+# The .text produced by BOLT is aligned to 0x10000000,
+# so _start will need a stub to jump here.
.global far_away_func
.type far_away_func, %function
far_away_func:
diff --git a/bolt/test/AArch64/long-jmp-bti.s b/bolt/test/AArch64/long-jmp-bti.s
index 90350ab9cf14a..d3521f5aad591 100644
--- a/bolt/test/AArch64/long-jmp-bti.s
+++ b/bolt/test/AArch64/long-jmp-bti.s
@@ -7,21 +7,21 @@
# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions \
# RUN: --print-split --print-only foo --print-longjmp 2>&1 | FileCheck %s
-#CHECK: BOLT-INFO: Starting stub-insertion pass
-#CHECK: Binary Function "foo" after long-jmp
+# CHECK: BOLT-INFO: Starting stub-insertion pass
+# CHECK: Binary Function "foo" after long-jmp
-#CHECK: cmp x0, #0x0
-#CHECK-NEXT: Successors: .LStub0
+# CHECK: cmp x0, #0x0
+# CHECK-NEXT: Successors: .LStub0
-#CHECK: adrp x16, .Ltmp0
-#CHECK-NEXT: add x16, x16, :lo12:.Ltmp0
-#CHECK-NEXT: br x16 # UNKNOWN CONTROL FLOW
+# CHECK: adrp x16, .Ltmp0
+# CHECK-NEXT: add x16, x16, :lo12:.Ltmp0
+# CHECK-NEXT: br x16 # UNKNOWN CONTROL FLOW
-#CHECK: ------- HOT-COLD SPLIT POINT -------
+# CHECK: ------- HOT-COLD SPLIT POINT -------
-#CHECK: bti c
-#CHECK-NEXT: mov x0, #0x2
-#CHECK-NEXT: ret
+# CHECK: bti c
+# CHECK-NEXT: mov x0, #0x2
+# CHECK-NEXT: ret
.text
.globl foo
>From 3bfa390c90ce67043dcbbd6f3124c0c27b8c300f Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Fri, 19 Dec 2025 16:26:29 +0000
Subject: [PATCH 4/4] [BOLT] Add test for PLT entries
---
bolt/lib/Passes/LongJmp.cpp | 13 ++++++----
bolt/test/AArch64/long-jmp-bti-plt.c | 37 ++++++++++++++++++++++++++++
2 files changed, 45 insertions(+), 5 deletions(-)
create mode 100644 bolt/test/AArch64/long-jmp-bti-plt.c
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index 3dbb5011c9bcc..9fedf919dd489 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -500,16 +500,19 @@ Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) {
exit(1);
}
if (TargetFunction && TargetFunction->isIgnored()) {
+ // Includes PLT functions.
BC.errs() << "BOLT-ERROR: Cannot add BTI landing pad to ignored function "
<< TargetFunction->getPrintName() << "\n";
exit(1);
}
if (TargetFunction && !TargetFunction->hasCFG()) {
- auto FirstII = TargetFunction->instrs().begin();
- MCInst FirstInst = FirstII->second;
- if (BC.MIB->isCallCoveredByBTI(*StubBB.getLastNonPseudoInstr(),
- FirstInst))
- return;
+ if (TargetFunction->hasInstructions()) {
+ auto FirstII = TargetFunction->instrs().begin();
+ MCInst FirstInst = FirstII->second;
+ if (BC.MIB->isCallCoveredByBTI(*StubBB.getLastNonPseudoInstr(),
+ FirstInst))
+ return;
+ }
BC.errs()
<< "BOLT-ERROR: Cannot add BTI landing pad to function without CFG: "
<< TargetFunction->getPrintName() << "\n";
diff --git a/bolt/test/AArch64/long-jmp-bti-plt.c b/bolt/test/AArch64/long-jmp-bti-plt.c
new file mode 100644
index 0000000000000..9c307886a4786
--- /dev/null
+++ b/bolt/test/AArch64/long-jmp-bti-plt.c
@@ -0,0 +1,37 @@
+/*
+
+// REQUIRES: system-linux
+
+// RUN: split-file %s %t
+
+// RUN: %clang -mbranch-protection=standard \
+// RUN: -no-pie %t/bti-plt.c -o %t.exe -Wl,-q -fuse-ld=lld \
+// RUN: -Wl,-T,%t/link.ld -Wl,-z,force-bti
+// RUN: not llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
+// CHECK: BOLT-INFO: binary is using BTI.
+// CHECK: BOLT-ERROR: Cannot add BTI landing pad to ignored function abort at PLT
+
+#--- link.ld
+
+SECTIONS {
+ .plt : { *(.plt .plt.*) }
+} INSERT BEFORE .text;
+
+#--- bti-plt.c
+
+//*/
+
+#include <stdio.h>
+
+int main(void) {
+ printf("Hello, World!\n");
+ return 0;
+}
+
+// .data big enough so the new code placed after it has to use stubs to reach
+// PLTs:
+__asm__(".section .data\n"
+ ".globl space\n"
+ "space:\n"
+ ".fill 0x8000000,1,0x0\n"
+ ".text\n");
More information about the llvm-commits
mailing list