[llvm] [BOLT] Add validation for direct call/branch targets, bypassing invalid functions (PR #165406)
Jinjie Huang via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 30 20:05:44 PDT 2025
https://github.com/Jinjie-Huang updated https://github.com/llvm/llvm-project/pull/165406
>From ae643253fe8d1a3769eb1b112efa77ff5215d987 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Tue, 28 Oct 2025 22:17:14 +0800
Subject: [PATCH 1/2] validate direct branch target
---
bolt/include/bolt/Core/BinaryFunction.h | 7 +++++
bolt/lib/Core/BinaryFunction.cpp | 40 +++++++++++++++++++++++++
2 files changed, 47 insertions(+)
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index b215a1558cbb4..6fdc8336bf1b9 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2236,6 +2236,13 @@ class BinaryFunction {
/// it is probably another function.
bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;
+ /// Validates if the target of a direct branch/call is a valid
+ /// executable instruction.
+ /// Return true if the target is valid, false otherwise.
+ bool validateBranchTarget(uint64_t TargetAddress,
+ uint64_t AbsoluteInstrAddr,
+ const ArrayRef<uint8_t> &CurrentFunctionData);
+
/// Disassemble function from raw data.
/// If successful, this function will populate the list of instructions
/// for this function together with offsets from the function start
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index fbe186454351c..9f1d1e905ce87 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1283,6 +1283,41 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
return std::nullopt;
}
+bool BinaryFunction::validateBranchTarget(
+ uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
+ const ArrayRef<uint8_t> &CurrentFunctionData) {
+ if (auto *TargetFunc =
+ BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+ const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
+ ArrayRef<uint8_t> TargetFunctionData;
+ // Check if the target address is within the current function.
+ if (TargetFunc == this) {
+ TargetFunctionData = CurrentFunctionData;
+ } else {
+ // external call/branch, fetch the binary data for target
+ ErrorOr<ArrayRef<uint8_t>> TargetDataOrErr = TargetFunc->getData();
+ assert(TargetDataOrErr && "function data is not available");
+ TargetFunctionData = *TargetDataOrErr;
+ }
+
+ MCInst TargetInst;
+ uint64_t TargetInstSize;
+ if (!BC.SymbolicDisAsm->getInstruction(
+ TargetInst, TargetInstSize, TargetFunctionData.slice(TargetOffset),
+ TargetAddress, nulls())) {
+ // If the target address cannot be disassembled well,
+ // it implies a corrupted control flow.
+ BC.errs() << "BOLT-WARNING: direct branch/call at 0x"
+ << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
+ << *this << " targets an invalid instruction at 0x"
+ << Twine::utohexstr(TargetAddress) << "\n";
+ return false;
+ }
+ }
+
+ return true;
+}
+
Error BinaryFunction::disassemble() {
NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
"Build Binary Functions", opts::TimeBuild);
@@ -1396,6 +1431,11 @@ Error BinaryFunction::disassemble() {
uint64_t TargetAddress = 0;
if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
TargetAddress)) {
+ if (!validateBranchTarget(TargetAddress, AbsoluteInstrAddr,
+ FunctionData)) {
+ setIgnored();
+ break;
+ }
// Check if the target is within the same function. Otherwise it's
// a call, possibly a tail call.
//
>From 25b21b99ee0964ce798820735b5d518430fb229e Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Wed, 29 Oct 2025 23:04:35 +0800
Subject: [PATCH 2/2] validate direct branch target
---
bolt/include/bolt/Core/BinaryFunction.h | 3 +--
bolt/lib/Core/BinaryFunction.cpp | 3 +--
bolt/test/X86/validate-branch-target.s | 35 +++++++++++++++++++++++++
3 files changed, 37 insertions(+), 4 deletions(-)
create mode 100644 bolt/test/X86/validate-branch-target.s
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 6fdc8336bf1b9..c9ec1b4aefb3e 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2239,8 +2239,7 @@ class BinaryFunction {
/// Validates if the target of a direct branch/call is a valid
/// executable instruction.
/// Return true if the target is valid, false otherwise.
- bool validateBranchTarget(uint64_t TargetAddress,
- uint64_t AbsoluteInstrAddr,
+ bool validateBranchTarget(uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
const ArrayRef<uint8_t> &CurrentFunctionData);
/// Disassemble function from raw data.
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 9f1d1e905ce87..0a638ec58d000 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1286,8 +1286,7 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
bool BinaryFunction::validateBranchTarget(
uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
const ArrayRef<uint8_t> &CurrentFunctionData) {
- if (auto *TargetFunc =
- BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+ if (auto *TargetFunc = BC.getBinaryFunctionContainingAddress(TargetAddress)) {
const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
ArrayRef<uint8_t> TargetFunctionData;
// Check if the target address is within the current function.
diff --git a/bolt/test/X86/validate-branch-target.s b/bolt/test/X86/validate-branch-target.s
new file mode 100644
index 0000000000000..56437681c238f
--- /dev/null
+++ b/bolt/test/X86/validate-branch-target.s
@@ -0,0 +1,35 @@
+## Test that BOLT errs when detecting the target
+## of a direct call/branch is a invalid instruction
+
+# REQUIRES: system-linux
+# RUN: rm -rf %t && mkdir -p %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o main.o
+# RUN: %clang %cflags -pie -Wl,-q %t/main.o -o main.exe
+# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
+
+# CHECK-TARGETS: BOLT-WARNING: direct branch/call at 0x{{[0-9a-f]+}} in function RC4_options targets an invalid instruction at 0x{{[0-9a-f]+}}
+
+# a date-in-code function case from OPENSSL
+.globl RC4_options
+.type RC4_options, at function
+.align 16
+RC4_options:
+ leaq .Lopts(%rip),%rax
+ btl $20,%edx
+ jc .L8xchar
+ btl $30,%edx
+ jnc .Ldone
+ addq $25,%rax
+ .byte 0xf3,0xc3
+.L8xchar:
+ addq $12,%rax
+.Ldone:
+ .byte 0xf3,0xc3
+.align 64
+.Lopts:
+.byte 114,99,52,40,56,120,44,105,110,116,41,0 # data '114' will be disassembled as 'jb'
+.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
+.byte 114,99,52,40,49,54,120,44,105,110,116,41,0
+.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+.size RC4_options,.-RC4_options
More information about the llvm-commits
mailing list