[llvm] [BOLT] Add validation for direct call/branch targets, bypassing invalid functions (PR #165406)
Jinjie Huang via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 29 08:23:03 PDT 2025
https://github.com/Jinjie-Huang updated https://github.com/llvm/llvm-project/pull/165406
>From cc1d797f16958f0ebd53727f0668fd3482f70749 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Tue, 28 Oct 2025 22:17:14 +0800
Subject: [PATCH 1/3] validate direct branch target
---
bolt/include/bolt/Core/BinaryFunction.h | 7 +++++
bolt/lib/Core/BinaryFunction.cpp | 40 +++++++++++++++++++++++++
2 files changed, 47 insertions(+)
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index b215a1558cbb4..6fdc8336bf1b9 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2236,6 +2236,13 @@ class BinaryFunction {
/// it is probably another function.
bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;
+ /// Validates if the target of a direct branch/call is a valid
+ /// executable instruction.
+ /// Return true if the target is valid, false otherwise.
+ bool validateBranchTarget(uint64_t TargetAddress,
+ uint64_t AbsoluteInstrAddr,
+ const ArrayRef<uint8_t> &CurrentFunctionData);
+
/// Disassemble function from raw data.
/// If successful, this function will populate the list of instructions
/// for this function together with offsets from the function start
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 84023efe1084e..821a302d14ba1 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1283,6 +1283,41 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
return std::nullopt;
}
+bool BinaryFunction::validateBranchTarget(
+ uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
+ const ArrayRef<uint8_t> &CurrentFunctionData) {
+ if (auto *TargetFunc =
+ BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+ const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
+ ArrayRef<uint8_t> TargetFunctionData;
+ // Check if the target address is within the current function.
+ if (TargetFunc == this) {
+ TargetFunctionData = CurrentFunctionData;
+ } else {
+ // external call/branch, fetch the binary data for target
+ ErrorOr<ArrayRef<uint8_t>> TargetDataOrErr = TargetFunc->getData();
+ assert(TargetDataOrErr && "function data is not available");
+ TargetFunctionData = *TargetDataOrErr;
+ }
+
+ MCInst TargetInst;
+ uint64_t TargetInstSize;
+ if (!BC.SymbolicDisAsm->getInstruction(
+ TargetInst, TargetInstSize, TargetFunctionData.slice(TargetOffset),
+ TargetAddress, nulls())) {
+ // If the target address cannot be disassembled well,
+ // it implies a corrupted control flow.
+ BC.errs() << "BOLT-WARNING: direct branch/call at 0x"
+ << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
+ << *this << " targets an invalid instruction at 0x"
+ << Twine::utohexstr(TargetAddress) << "\n";
+ return false;
+ }
+ }
+
+ return true;
+}
+
Error BinaryFunction::disassemble() {
NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
"Build Binary Functions", opts::TimeBuild);
@@ -1396,6 +1431,11 @@ Error BinaryFunction::disassemble() {
uint64_t TargetAddress = 0;
if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
TargetAddress)) {
+ if (!validateBranchTarget(TargetAddress, AbsoluteInstrAddr,
+ FunctionData)) {
+ setIgnored();
+ break;
+ }
// Check if the target is within the same function. Otherwise it's
// a call, possibly a tail call.
//
>From df40e1a2f8aba0cb2c1a340c61e8fb08e5865380 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Wed, 29 Oct 2025 23:04:35 +0800
Subject: [PATCH 2/3] validate direct branch target
---
bolt/include/bolt/Core/BinaryFunction.h | 3 +--
bolt/lib/Core/BinaryFunction.cpp | 19 ++++++++++----
bolt/test/X86/validate-branch-target.s | 35 +++++++++++++++++++++++++
3 files changed, 50 insertions(+), 7 deletions(-)
create mode 100644 bolt/test/X86/validate-branch-target.s
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 6fdc8336bf1b9..c9ec1b4aefb3e 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2239,8 +2239,7 @@ class BinaryFunction {
/// Validates if the target of a direct branch/call is a valid
/// executable instruction.
/// Return true if the target is valid, false otherwise.
- bool validateBranchTarget(uint64_t TargetAddress,
- uint64_t AbsoluteInstrAddr,
+ bool validateBranchTarget(uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
const ArrayRef<uint8_t> &CurrentFunctionData);
/// Disassemble function from raw data.
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 821a302d14ba1..1d7c9bf37d090 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1286,8 +1286,7 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
bool BinaryFunction::validateBranchTarget(
uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
const ArrayRef<uint8_t> &CurrentFunctionData) {
- if (auto *TargetFunc =
- BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+ if (auto *TargetFunc = BC.getBinaryFunctionContainingAddress(TargetAddress)) {
const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
ArrayRef<uint8_t> TargetFunctionData;
// Check if the target address is within the current function.
@@ -1739,9 +1738,19 @@ bool BinaryFunction::scanExternalRefs() {
const uint64_t FunctionOffset =
TargetAddress - TargetFunction->getAddress();
- BranchTargetSymbol =
- FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
- : TargetFunction->getSymbol();
+ if (!TargetFunction->isInConstantIsland(TargetAddress)) {
+ BranchTargetSymbol =
+ FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
+ : TargetFunction->getSymbol();
+ } else {
+ TargetFunction->setIgnored();
+ Success = false;
+ BC.outs() << "BOLT-WARNING: Ignoring entry point at address 0x"
+ << Twine::utohexstr(Address)
+ << " in constant island of function " << *TargetFunction
+ << '\n';
+ break;
+ }
}
// Can't find more references. Not creating relocations since we are not
diff --git a/bolt/test/X86/validate-branch-target.s b/bolt/test/X86/validate-branch-target.s
new file mode 100644
index 0000000000000..56437681c238f
--- /dev/null
+++ b/bolt/test/X86/validate-branch-target.s
@@ -0,0 +1,35 @@
+## Test that BOLT errs when detecting the target
+## of a direct call/branch is a invalid instruction
+
+# REQUIRES: system-linux
+# RUN: rm -rf %t && mkdir -p %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o main.o
+# RUN: %clang %cflags -pie -Wl,-q %t/main.o -o main.exe
+# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
+
+# CHECK-TARGETS: BOLT-WARNING: direct branch/call at 0x{{[0-9a-f]+}} in function RC4_options targets an invalid instruction at 0x{{[0-9a-f]+}}
+
+# a date-in-code function case from OPENSSL
+.globl RC4_options
+.type RC4_options, at function
+.align 16
+RC4_options:
+ leaq .Lopts(%rip),%rax
+ btl $20,%edx
+ jc .L8xchar
+ btl $30,%edx
+ jnc .Ldone
+ addq $25,%rax
+ .byte 0xf3,0xc3
+.L8xchar:
+ addq $12,%rax
+.Ldone:
+ .byte 0xf3,0xc3
+.align 64
+.Lopts:
+.byte 114,99,52,40,56,120,44,105,110,116,41,0 # data '114' will be disassembled as 'jb'
+.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
+.byte 114,99,52,40,49,54,120,44,105,110,116,41,0
+.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+.size RC4_options,.-RC4_options
>From 89783665b64f1664ca19746b89bc5d1e717ab7c8 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Wed, 29 Oct 2025 23:22:28 +0800
Subject: [PATCH 3/3] fix format
---
bolt/lib/Core/BinaryFunction.cpp | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 1d7c9bf37d090..13e0bf078273a 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1740,15 +1740,16 @@ bool BinaryFunction::scanExternalRefs() {
TargetAddress - TargetFunction->getAddress();
if (!TargetFunction->isInConstantIsland(TargetAddress)) {
BranchTargetSymbol =
- FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
- : TargetFunction->getSymbol();
+ FunctionOffset
+ ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
+ : TargetFunction->getSymbol();
} else {
TargetFunction->setIgnored();
Success = false;
BC.outs() << "BOLT-WARNING: Ignoring entry point at address 0x"
- << Twine::utohexstr(Address)
- << " in constant island of function " << *TargetFunction
- << '\n';
+ << Twine::utohexstr(Address)
+ << " in constant island of function " << *TargetFunction
+ << '\n';
break;
}
}
More information about the llvm-commits
mailing list