[llvm] [BOLT] Add validation for direct call/branch targets, bypassing invalid functions (PR #165406)

Jinjie Huang via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 30 20:05:44 PDT 2025


https://github.com/Jinjie-Huang updated https://github.com/llvm/llvm-project/pull/165406

>From ae643253fe8d1a3769eb1b112efa77ff5215d987 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Tue, 28 Oct 2025 22:17:14 +0800
Subject: [PATCH 1/2] validate direct branch target

---
 bolt/include/bolt/Core/BinaryFunction.h |  7 +++++
 bolt/lib/Core/BinaryFunction.cpp        | 40 +++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index b215a1558cbb4..6fdc8336bf1b9 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2236,6 +2236,13 @@ class BinaryFunction {
   /// it is probably another function.
   bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;
 
+  /// Validates if the target of a direct branch/call is a valid
+  /// executable instruction.
+  /// Return true if the target is valid, false otherwise.
+  bool validateBranchTarget(uint64_t TargetAddress,
+                            uint64_t AbsoluteInstrAddr,
+                            const ArrayRef<uint8_t> &CurrentFunctionData);
+
   /// Disassemble function from raw data.
   /// If successful, this function will populate the list of instructions
   /// for this function together with offsets from the function start
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index fbe186454351c..9f1d1e905ce87 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1283,6 +1283,41 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
   return std::nullopt;
 }
 
+bool BinaryFunction::validateBranchTarget(
+    uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
+    const ArrayRef<uint8_t> &CurrentFunctionData) {
+  if (auto *TargetFunc =
+          BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+    const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
+    ArrayRef<uint8_t> TargetFunctionData;
+    // Check if the target address is within the current function.
+    if (TargetFunc == this) {
+      TargetFunctionData = CurrentFunctionData;
+    } else {
+      // external call/branch, fetch the binary data for target
+      ErrorOr<ArrayRef<uint8_t>> TargetDataOrErr = TargetFunc->getData();
+      assert(TargetDataOrErr && "function data is not available");
+      TargetFunctionData = *TargetDataOrErr;
+    }
+
+    MCInst TargetInst;
+    uint64_t TargetInstSize;
+    if (!BC.SymbolicDisAsm->getInstruction(
+            TargetInst, TargetInstSize, TargetFunctionData.slice(TargetOffset),
+            TargetAddress, nulls())) {
+      // If the target address cannot be disassembled well,
+      // it implies a corrupted control flow.
+      BC.errs() << "BOLT-WARNING: direct branch/call at 0x"
+                << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
+                << *this << " targets an invalid instruction at 0x"
+                << Twine::utohexstr(TargetAddress) << "\n";
+      return false;
+    }
+  }
+
+  return true;
+}
+
 Error BinaryFunction::disassemble() {
   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
                      "Build Binary Functions", opts::TimeBuild);
@@ -1396,6 +1431,11 @@ Error BinaryFunction::disassemble() {
       uint64_t TargetAddress = 0;
       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
                               TargetAddress)) {
+        if (!validateBranchTarget(TargetAddress, AbsoluteInstrAddr,
+                                  FunctionData)) {
+          setIgnored();
+          break;
+        }
         // Check if the target is within the same function. Otherwise it's
         // a call, possibly a tail call.
         //

>From 25b21b99ee0964ce798820735b5d518430fb229e Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Wed, 29 Oct 2025 23:04:35 +0800
Subject: [PATCH 2/2] validate direct branch target

---
 bolt/include/bolt/Core/BinaryFunction.h |  3 +--
 bolt/lib/Core/BinaryFunction.cpp        |  3 +--
 bolt/test/X86/validate-branch-target.s  | 35 +++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 4 deletions(-)
 create mode 100644 bolt/test/X86/validate-branch-target.s

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 6fdc8336bf1b9..c9ec1b4aefb3e 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2239,8 +2239,7 @@ class BinaryFunction {
   /// Validates if the target of a direct branch/call is a valid
   /// executable instruction.
   /// Return true if the target is valid, false otherwise.
-  bool validateBranchTarget(uint64_t TargetAddress,
-                            uint64_t AbsoluteInstrAddr,
+  bool validateBranchTarget(uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
                             const ArrayRef<uint8_t> &CurrentFunctionData);
 
   /// Disassemble function from raw data.
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 9f1d1e905ce87..0a638ec58d000 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1286,8 +1286,7 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
 bool BinaryFunction::validateBranchTarget(
     uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
     const ArrayRef<uint8_t> &CurrentFunctionData) {
-  if (auto *TargetFunc =
-          BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+  if (auto *TargetFunc = BC.getBinaryFunctionContainingAddress(TargetAddress)) {
     const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
     ArrayRef<uint8_t> TargetFunctionData;
     // Check if the target address is within the current function.
diff --git a/bolt/test/X86/validate-branch-target.s b/bolt/test/X86/validate-branch-target.s
new file mode 100644
index 0000000000000..56437681c238f
--- /dev/null
+++ b/bolt/test/X86/validate-branch-target.s
@@ -0,0 +1,35 @@
+## Test that BOLT errs when detecting the target 
+## of a direct call/branch is a invalid instruction
+
+# REQUIRES: system-linux
+# RUN: rm -rf %t && mkdir -p %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o main.o
+# RUN: %clang %cflags -pie -Wl,-q %t/main.o -o main.exe
+# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
+
+# CHECK-TARGETS: BOLT-WARNING: direct branch/call at 0x{{[0-9a-f]+}} in function RC4_options targets an invalid instruction at 0x{{[0-9a-f]+}}
+
+# a date-in-code function case from OPENSSL
+.globl	RC4_options
+.type	RC4_options, at function
+.align	16
+RC4_options:
+	leaq	.Lopts(%rip),%rax
+	btl	$20,%edx
+	jc	.L8xchar
+	btl	$30,%edx
+	jnc	.Ldone
+	addq	$25,%rax
+	.byte	0xf3,0xc3
+.L8xchar:
+	addq	$12,%rax
+.Ldone:
+	.byte	0xf3,0xc3
+.align	64
+.Lopts:
+.byte	114,99,52,40,56,120,44,105,110,116,41,0  # data '114' will be disassembled as 'jb'
+.byte	114,99,52,40,56,120,44,99,104,97,114,41,0
+.byte	114,99,52,40,49,54,120,44,105,110,116,41,0
+.byte	82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	64
+.size	RC4_options,.-RC4_options



More information about the llvm-commits mailing list