[llvm] [BOLT] Add validation for direct call/branch targets, bypassing invalid functions (PR #165406)

Jinjie Huang via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 29 08:23:03 PDT 2025


https://github.com/Jinjie-Huang updated https://github.com/llvm/llvm-project/pull/165406

>From cc1d797f16958f0ebd53727f0668fd3482f70749 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Tue, 28 Oct 2025 22:17:14 +0800
Subject: [PATCH 1/3] validate direct branch target

---
 bolt/include/bolt/Core/BinaryFunction.h |  7 +++++
 bolt/lib/Core/BinaryFunction.cpp        | 40 +++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index b215a1558cbb4..6fdc8336bf1b9 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2236,6 +2236,13 @@ class BinaryFunction {
   /// it is probably another function.
   bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;
 
+  /// Validates if the target of a direct branch/call is a valid
+  /// executable instruction.
+  /// Return true if the target is valid, false otherwise.
+  bool validateBranchTarget(uint64_t TargetAddress,
+                            uint64_t AbsoluteInstrAddr,
+                            const ArrayRef<uint8_t> &CurrentFunctionData);
+
   /// Disassemble function from raw data.
   /// If successful, this function will populate the list of instructions
   /// for this function together with offsets from the function start
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 84023efe1084e..821a302d14ba1 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1283,6 +1283,41 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
   return std::nullopt;
 }
 
+bool BinaryFunction::validateBranchTarget(
+    uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
+    const ArrayRef<uint8_t> &CurrentFunctionData) {
+  if (auto *TargetFunc =
+          BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+    const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
+    ArrayRef<uint8_t> TargetFunctionData;
+    // Check if the target address is within the current function.
+    if (TargetFunc == this) {
+      TargetFunctionData = CurrentFunctionData;
+    } else {
+      // external call/branch, fetch the binary data for target
+      ErrorOr<ArrayRef<uint8_t>> TargetDataOrErr = TargetFunc->getData();
+      assert(TargetDataOrErr && "function data is not available");
+      TargetFunctionData = *TargetDataOrErr;
+    }
+
+    MCInst TargetInst;
+    uint64_t TargetInstSize;
+    if (!BC.SymbolicDisAsm->getInstruction(
+            TargetInst, TargetInstSize, TargetFunctionData.slice(TargetOffset),
+            TargetAddress, nulls())) {
+      // If the target address cannot be disassembled well,
+      // it implies a corrupted control flow.
+      BC.errs() << "BOLT-WARNING: direct branch/call at 0x"
+                << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
+                << *this << " targets an invalid instruction at 0x"
+                << Twine::utohexstr(TargetAddress) << "\n";
+      return false;
+    }
+  }
+
+  return true;
+}
+
 Error BinaryFunction::disassemble() {
   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
                      "Build Binary Functions", opts::TimeBuild);
@@ -1396,6 +1431,11 @@ Error BinaryFunction::disassemble() {
       uint64_t TargetAddress = 0;
       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
                               TargetAddress)) {
+        if (!validateBranchTarget(TargetAddress, AbsoluteInstrAddr,
+                                  FunctionData)) {
+          setIgnored();
+          break;
+        }
         // Check if the target is within the same function. Otherwise it's
         // a call, possibly a tail call.
         //

>From df40e1a2f8aba0cb2c1a340c61e8fb08e5865380 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Wed, 29 Oct 2025 23:04:35 +0800
Subject: [PATCH 2/3] validate direct branch target

---
 bolt/include/bolt/Core/BinaryFunction.h |  3 +--
 bolt/lib/Core/BinaryFunction.cpp        | 19 ++++++++++----
 bolt/test/X86/validate-branch-target.s  | 35 +++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 7 deletions(-)
 create mode 100644 bolt/test/X86/validate-branch-target.s

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 6fdc8336bf1b9..c9ec1b4aefb3e 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2239,8 +2239,7 @@ class BinaryFunction {
   /// Validates if the target of a direct branch/call is a valid
   /// executable instruction.
   /// Return true if the target is valid, false otherwise.
-  bool validateBranchTarget(uint64_t TargetAddress,
-                            uint64_t AbsoluteInstrAddr,
+  bool validateBranchTarget(uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
                             const ArrayRef<uint8_t> &CurrentFunctionData);
 
   /// Disassemble function from raw data.
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 821a302d14ba1..1d7c9bf37d090 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1286,8 +1286,7 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
 bool BinaryFunction::validateBranchTarget(
     uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
     const ArrayRef<uint8_t> &CurrentFunctionData) {
-  if (auto *TargetFunc =
-          BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+  if (auto *TargetFunc = BC.getBinaryFunctionContainingAddress(TargetAddress)) {
     const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
     ArrayRef<uint8_t> TargetFunctionData;
     // Check if the target address is within the current function.
@@ -1739,9 +1738,19 @@ bool BinaryFunction::scanExternalRefs() {
 
       const uint64_t FunctionOffset =
           TargetAddress - TargetFunction->getAddress();
-      BranchTargetSymbol =
-          FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
-                         : TargetFunction->getSymbol();
+      if (!TargetFunction->isInConstantIsland(TargetAddress)) {
+        BranchTargetSymbol =
+            FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
+                          : TargetFunction->getSymbol();
+      } else {
+        TargetFunction->setIgnored();
+        Success = false;
+        BC.outs() << "BOLT-WARNING: Ignoring entry point at address 0x"
+                      << Twine::utohexstr(Address)
+                      << " in constant island of function " << *TargetFunction
+                      << '\n';
+        break;
+      }
     }
 
     // Can't find more references. Not creating relocations since we are not
diff --git a/bolt/test/X86/validate-branch-target.s b/bolt/test/X86/validate-branch-target.s
new file mode 100644
index 0000000000000..56437681c238f
--- /dev/null
+++ b/bolt/test/X86/validate-branch-target.s
@@ -0,0 +1,35 @@
+## Test that BOLT errs when detecting the target 
+## of a direct call/branch is a invalid instruction
+
+# REQUIRES: system-linux
+# RUN: rm -rf %t && mkdir -p %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o main.o
+# RUN: %clang %cflags -pie -Wl,-q %t/main.o -o main.exe
+# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
+
+# CHECK-TARGETS: BOLT-WARNING: direct branch/call at 0x{{[0-9a-f]+}} in function RC4_options targets an invalid instruction at 0x{{[0-9a-f]+}}
+
+# a date-in-code function case from OPENSSL
+.globl	RC4_options
+.type	RC4_options, at function
+.align	16
+RC4_options:
+	leaq	.Lopts(%rip),%rax
+	btl	$20,%edx
+	jc	.L8xchar
+	btl	$30,%edx
+	jnc	.Ldone
+	addq	$25,%rax
+	.byte	0xf3,0xc3
+.L8xchar:
+	addq	$12,%rax
+.Ldone:
+	.byte	0xf3,0xc3
+.align	64
+.Lopts:
+.byte	114,99,52,40,56,120,44,105,110,116,41,0  # data '114' will be disassembled as 'jb'
+.byte	114,99,52,40,56,120,44,99,104,97,114,41,0
+.byte	114,99,52,40,49,54,120,44,105,110,116,41,0
+.byte	82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	64
+.size	RC4_options,.-RC4_options

>From 89783665b64f1664ca19746b89bc5d1e717ab7c8 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Wed, 29 Oct 2025 23:22:28 +0800
Subject: [PATCH 3/3] fix format

---
 bolt/lib/Core/BinaryFunction.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 1d7c9bf37d090..13e0bf078273a 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1740,15 +1740,16 @@ bool BinaryFunction::scanExternalRefs() {
           TargetAddress - TargetFunction->getAddress();
       if (!TargetFunction->isInConstantIsland(TargetAddress)) {
         BranchTargetSymbol =
-            FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
-                          : TargetFunction->getSymbol();
+            FunctionOffset
+                ? TargetFunction->addEntryPointAtOffset(FunctionOffset)
+                : TargetFunction->getSymbol();
       } else {
         TargetFunction->setIgnored();
         Success = false;
         BC.outs() << "BOLT-WARNING: Ignoring entry point at address 0x"
-                      << Twine::utohexstr(Address)
-                      << " in constant island of function " << *TargetFunction
-                      << '\n';
+                  << Twine::utohexstr(Address)
+                  << " in constant island of function " << *TargetFunction
+                  << '\n';
         break;
       }
     }



More information about the llvm-commits mailing list