[llvm] [BOLT] Add validation for direct call/branch targets, bypassing invalid functions (PR #165406)

Jinjie Huang via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 1 05:47:22 PST 2025


https://github.com/Jinjie-Huang updated https://github.com/llvm/llvm-project/pull/165406

>From 6727406699f96b56973c7794e99c7ce5baf035b5 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Tue, 28 Oct 2025 22:17:14 +0800
Subject: [PATCH 1/4] validate direct branch target

---
 bolt/include/bolt/Core/BinaryFunction.h |  7 +++++
 bolt/lib/Core/BinaryFunction.cpp        | 40 +++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index a720c6af216d7..b41c35c86bb00 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2264,6 +2264,13 @@ class BinaryFunction {
   /// it is probably another function.
   bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;
 
+  /// Validates if the target of a direct branch/call is a valid
+  /// executable instruction.
+  /// Return true if the target is valid, false otherwise.
+  bool validateBranchTarget(uint64_t TargetAddress,
+                            uint64_t AbsoluteInstrAddr,
+                            const ArrayRef<uint8_t> &CurrentFunctionData);
+
   /// Disassemble function from raw data.
   /// If successful, this function will populate the list of instructions
   /// for this function together with offsets from the function start
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index a0d8385aa3824..a835af0d1d5a6 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1283,6 +1283,41 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
   return std::nullopt;
 }
 
+bool BinaryFunction::validateBranchTarget(
+    uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
+    const ArrayRef<uint8_t> &CurrentFunctionData) {
+  if (auto *TargetFunc =
+          BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+    const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
+    ArrayRef<uint8_t> TargetFunctionData;
+    // Check if the target address is within the current function.
+    if (TargetFunc == this) {
+      TargetFunctionData = CurrentFunctionData;
+    } else {
+      // external call/branch, fetch the binary data for target
+      ErrorOr<ArrayRef<uint8_t>> TargetDataOrErr = TargetFunc->getData();
+      assert(TargetDataOrErr && "function data is not available");
+      TargetFunctionData = *TargetDataOrErr;
+    }
+
+    MCInst TargetInst;
+    uint64_t TargetInstSize;
+    if (!BC.SymbolicDisAsm->getInstruction(
+            TargetInst, TargetInstSize, TargetFunctionData.slice(TargetOffset),
+            TargetAddress, nulls())) {
+      // If the target address cannot be disassembled well,
+      // it implies a corrupted control flow.
+      BC.errs() << "BOLT-WARNING: direct branch/call at 0x"
+                << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
+                << *this << " targets an invalid instruction at 0x"
+                << Twine::utohexstr(TargetAddress) << "\n";
+      return false;
+    }
+  }
+
+  return true;
+}
+
 Error BinaryFunction::disassemble() {
   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
                      "Build Binary Functions", opts::TimeBuild);
@@ -1396,6 +1431,11 @@ Error BinaryFunction::disassemble() {
       uint64_t TargetAddress = 0;
       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
                               TargetAddress)) {
+        if (!validateBranchTarget(TargetAddress, AbsoluteInstrAddr,
+                                  FunctionData)) {
+          setIgnored();
+          break;
+        }
         // Check if the target is within the same function. Otherwise it's
         // a call, possibly a tail call.
         //

>From 7091ba817ab8683ca80363a870af41635242791a Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Fri, 14 Nov 2025 15:41:47 +0800
Subject: [PATCH 2/4] move validation after disassembler

---
 bolt/include/bolt/Core/BinaryFunction.h | 17 +++--
 bolt/lib/Core/BinaryContext.cpp         |  3 +
 bolt/lib/Core/BinaryFunction.cpp        | 96 ++++++++++++++-----------
 bolt/lib/Rewrite/RewriteInstance.cpp    |  1 +
 bolt/test/X86/validate-branch-target.s  | 47 ++++++++++++
 5 files changed, 117 insertions(+), 47 deletions(-)
 create mode 100644 bolt/test/X86/validate-branch-target.s

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index b41c35c86bb00..e20319f64eaae 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2264,13 +2264,6 @@ class BinaryFunction {
   /// it is probably another function.
   bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;
 
-  /// Validates if the target of a direct branch/call is a valid
-  /// executable instruction.
-  /// Return true if the target is valid, false otherwise.
-  bool validateBranchTarget(uint64_t TargetAddress,
-                            uint64_t AbsoluteInstrAddr,
-                            const ArrayRef<uint8_t> &CurrentFunctionData);
-
   /// Disassemble function from raw data.
   /// If successful, this function will populate the list of instructions
   /// for this function together with offsets from the function start
@@ -2327,6 +2320,16 @@ class BinaryFunction {
   /// zero-value bytes.
   bool isZeroPaddingAt(uint64_t Offset) const;
 
+  /// Validates if the target of an external direct branch/call is a valid
+  /// executable instruction.
+  /// Return true if the target is valid, false otherwise.
+  bool validateExternalBranch(uint64_t TargetAddress);
+
+  /// Validates if the target of an internal direct branch/call is a valid
+  /// executable instruction.
+  /// Return true if the target is valid, false otherwise.
+  bool validateInternalBranch();
+
   /// Check that entry points have an associated instruction at their
   /// offsets after disassembly.
   void postProcessEntryPoints();
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 6a285f5538dbd..0caaba9a28b99 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1421,6 +1421,9 @@ void BinaryContext::processInterproceduralReferences() {
     if (&Function == TargetFunction)
       continue;
 
+    if (!Function.validateExternalBranch(Address))
+      continue;
+
     if (TargetFunction) {
       if (TargetFunction->isFragment() &&
           !areRelatedFragments(TargetFunction, &Function)) {
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index a835af0d1d5a6..43e72a17702e1 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1283,41 +1283,6 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
   return std::nullopt;
 }
 
-bool BinaryFunction::validateBranchTarget(
-    uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
-    const ArrayRef<uint8_t> &CurrentFunctionData) {
-  if (auto *TargetFunc =
-          BC.getBinaryFunctionContainingAddress(TargetAddress)) {
-    const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
-    ArrayRef<uint8_t> TargetFunctionData;
-    // Check if the target address is within the current function.
-    if (TargetFunc == this) {
-      TargetFunctionData = CurrentFunctionData;
-    } else {
-      // external call/branch, fetch the binary data for target
-      ErrorOr<ArrayRef<uint8_t>> TargetDataOrErr = TargetFunc->getData();
-      assert(TargetDataOrErr && "function data is not available");
-      TargetFunctionData = *TargetDataOrErr;
-    }
-
-    MCInst TargetInst;
-    uint64_t TargetInstSize;
-    if (!BC.SymbolicDisAsm->getInstruction(
-            TargetInst, TargetInstSize, TargetFunctionData.slice(TargetOffset),
-            TargetAddress, nulls())) {
-      // If the target address cannot be disassembled well,
-      // it implies a corrupted control flow.
-      BC.errs() << "BOLT-WARNING: direct branch/call at 0x"
-                << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
-                << *this << " targets an invalid instruction at 0x"
-                << Twine::utohexstr(TargetAddress) << "\n";
-      return false;
-    }
-  }
-
-  return true;
-}
-
 Error BinaryFunction::disassemble() {
   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
                      "Build Binary Functions", opts::TimeBuild);
@@ -1431,11 +1396,6 @@ Error BinaryFunction::disassemble() {
       uint64_t TargetAddress = 0;
       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
                               TargetAddress)) {
-        if (!validateBranchTarget(TargetAddress, AbsoluteInstrAddr,
-                                  FunctionData)) {
-          setIgnored();
-          break;
-        }
         // Check if the target is within the same function. Otherwise it's
         // a call, possibly a tail call.
         //
@@ -1948,6 +1908,62 @@ bool BinaryFunction::scanExternalRefs() {
   return Success;
 }
 
+bool BinaryFunction::validateExternalBranch(uint64_t TargetAddress) {
+  if (!isSimple())
+    return true;
+
+  BinaryFunction *TargetFunction =
+      BC.getBinaryFunctionContainingAddress(TargetAddress);
+
+  bool IsValid = true;
+
+  if (TargetFunction) {
+    const uint64_t TargetOffset = TargetAddress - TargetFunction->getAddress();
+    if (TargetFunction->CurrentState == State::Disassembled &&
+        !TargetFunction->getInstructionAtOffset(TargetOffset))
+      IsValid = false;
+  } else {
+    if (!BC.getSectionForAddress(TargetAddress))
+      IsValid = false;
+  }
+
+  if (!IsValid) {
+    setIgnored();
+    BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
+              << *this
+              << ", an external branch/call targets an invalid instruction "
+              << "at address 0x" << Twine::utohexstr(TargetAddress) << "\n";
+    return false;
+  }
+
+  return true;
+}
+
+bool BinaryFunction::validateInternalBranch() {
+  if (!isSimple())
+    return true;
+
+  for (const auto &KV : Labels) {
+    MCSymbol *Label = KV.second;
+    if (getSecondaryEntryPointSymbol(Label))
+      continue;
+
+    const uint32_t Offset = KV.first;
+
+    if (getInstructionAtOffset(Offset) || !Offset)
+      continue;
+
+    BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
+              << *this << ", an internal branch/call targets an invalid "
+              << "instruction at address 0x"
+              << Twine::utohexstr(getAddress() + Offset) << "\n";
+    setIgnored();
+    return false;
+  }
+
+  return true;
+}
+
 void BinaryFunction::postProcessEntryPoints() {
   if (!isSimple())
     return;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 8d6731e7540a8..db85d047711ba 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3498,6 +3498,7 @@ void RewriteInstance::disassembleFunctions() {
     if (!shouldDisassemble(Function))
       continue;
 
+    Function.validateInternalBranch();
     Function.postProcessEntryPoints();
     Function.postProcessJumpTables();
   }
diff --git a/bolt/test/X86/validate-branch-target.s b/bolt/test/X86/validate-branch-target.s
new file mode 100644
index 0000000000000..c0493ad74ae0f
--- /dev/null
+++ b/bolt/test/X86/validate-branch-target.s
@@ -0,0 +1,47 @@
+## Test that BOLT errs when detecting the target 
+## of a direct call/branch is a invalid instruction
+
+# REQUIRES: system-linux
+# RUN: rm -rf %t && mkdir -p %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o main.o
+# RUN: %clang %cflags -pie -Wl,-q %t/main.o -o main.exe
+# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt -lite=0 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
+
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function external_corrcupt, an external branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function internal_corrcupt, an internal branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
+
+
+.globl	internal_corrcupt
+.type	internal_corrcupt, at function
+.align	16
+internal_corrcupt:
+	leaq	.Lopts_1(%rip),%rax
+	addq	$25,%rax
+	.byte	0xf3,0xc3
+.L8xchar_1:
+	addq	$12,%rax
+.Ldone_1:
+	.byte	0xf3,0xc3
+.align	64
+.Lopts_1:
+.byte	114,1,52,40,56,120,44,105,110,116,41,0  # data '114' will be disassembled as 'jb', check for internal branch: jb + 0x1
+.align	64
+.size	internal_corrcupt,.-internal_corrcupt
+
+
+.globl	external_corrcupt
+.type	external_corrcupt, at function
+.align	16
+external_corrcupt:
+	leaq	.Lopts_2(%rip),%rax
+	addq	$25,%rax
+	.byte	0xf3,0xc3
+.L8xchar_2:
+	addq	$12,%rax
+.Ldone_2:
+	.byte	0xf3,0xc3
+.align	64
+.Lopts_2:
+.byte	114,99,52,40,56,120,44,99,104,97,114,41,0  # data '114' will be disassembled as 'jb', check for external branch: jb + 0x63
+.align	64
+.size	external_corrcupt,.-external_corrcupt

>From de3f33140392b791189fdaee5ecb1a8da9d5ee4c Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Fri, 14 Nov 2025 17:12:15 +0800
Subject: [PATCH 3/4] add offset check

---
 bolt/lib/Core/BinaryContext.cpp  |  6 +++---
 bolt/lib/Core/BinaryFunction.cpp | 11 ++++++++++-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 0caaba9a28b99..448e3eb46c6b4 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1421,9 +1421,6 @@ void BinaryContext::processInterproceduralReferences() {
     if (&Function == TargetFunction)
       continue;
 
-    if (!Function.validateExternalBranch(Address))
-      continue;
-
     if (TargetFunction) {
       if (TargetFunction->isFragment() &&
           !areRelatedFragments(TargetFunction, &Function)) {
@@ -1441,6 +1438,9 @@ void BinaryContext::processInterproceduralReferences() {
       continue;
     }
 
+    if (!Function.validateExternalBranch(Address))
+      continue;
+
     // Check if address falls in function padding space - this could be
     // unmarked data in code. In this case adjust the padding space size.
     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 43e72a17702e1..6e616362ba1a8 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1919,6 +1919,11 @@ bool BinaryFunction::validateExternalBranch(uint64_t TargetAddress) {
 
   if (TargetFunction) {
     const uint64_t TargetOffset = TargetAddress - TargetFunction->getAddress();
+    // Skip empty functions and out-of-bounds offsets,
+    // as they may not be disassembled.
+    if (!TargetOffset || (TargetOffset > TargetFunction->getSize()))
+      return true;
+
     if (TargetFunction->CurrentState == State::Disassembled &&
         !TargetFunction->getInstructionAtOffset(TargetOffset))
       IsValid = false;
@@ -1949,8 +1954,12 @@ bool BinaryFunction::validateInternalBranch() {
       continue;
 
     const uint32_t Offset = KV.first;
+    // Skip empty functions and out-of-bounds offsets,
+    // as they may not be disassembled.
+    if (!Offset || (Offset > getSize()))
+      continue;
 
-    if (getInstructionAtOffset(Offset) || !Offset)
+    if (getInstructionAtOffset(Offset))
       continue;
 
     BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "

>From 517944f6fa86eb2ec3dcd35b6619869618941e97 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Mon, 1 Dec 2025 21:47:00 +0800
Subject: [PATCH 4/4] add aarch64 test case

---
 bolt/lib/Core/BinaryFunction.cpp           | 24 ++++++++--------
 bolt/test/AArch64/validate-branch-target.s | 33 ++++++++++++++++++++++
 2 files changed, 46 insertions(+), 11 deletions(-)
 create mode 100644 bolt/test/AArch64/validate-branch-target.s

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 6e616362ba1a8..47a1c506d8fbd 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1044,8 +1044,10 @@ MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address) {
 
   // For AArch64, check if this address is part of a constant island.
   if (BC.isAArch64()) {
-    if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
+    if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address)) {
+      Labels[Offset] = IslandSym;
       return IslandSym;
+    }
   }
 
   if (Offset == getSize())
@@ -1925,7 +1927,8 @@ bool BinaryFunction::validateExternalBranch(uint64_t TargetAddress) {
       return true;
 
     if (TargetFunction->CurrentState == State::Disassembled &&
-        !TargetFunction->getInstructionAtOffset(TargetOffset))
+        (!TargetFunction->getInstructionAtOffset(TargetOffset) ||
+         getSizeOfDataInCodeAt(TargetOffset)))
       IsValid = false;
   } else {
     if (!BC.getSectionForAddress(TargetAddress))
@@ -1959,15 +1962,14 @@ bool BinaryFunction::validateInternalBranch() {
     if (!Offset || (Offset > getSize()))
       continue;
 
-    if (getInstructionAtOffset(Offset))
-      continue;
-
-    BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
-              << *this << ", an internal branch/call targets an invalid "
-              << "instruction at address 0x"
-              << Twine::utohexstr(getAddress() + Offset) << "\n";
-    setIgnored();
-    return false;
+    if (!getInstructionAtOffset(Offset) || getSizeOfDataInCodeAt(Offset)) {
+      BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
+                << *this << ", an internal branch/call targets an invalid "
+                << "instruction at address 0x"
+                << Twine::utohexstr(getAddress() + Offset) << "\n";
+      setIgnored();
+      return false;
+    }
   }
 
   return true;
diff --git a/bolt/test/AArch64/validate-branch-target.s b/bolt/test/AArch64/validate-branch-target.s
new file mode 100644
index 0000000000000..935663d090c10
--- /dev/null
+++ b/bolt/test/AArch64/validate-branch-target.s
@@ -0,0 +1,33 @@
+## Test that BOLT errs when detecting the target 
+## of a direct call/branch is a invalid instruction
+
+# REQUIRES: system-linux
+# RUN: rm -rf %t && mkdir -p %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-linux %s -o main.o
+# RUN: %clang %cflags %t/main.o -o main.exe -Wl,-q
+# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt -lite=0 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
+
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function external_corrupt, an external branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function internal_corrupt, an internal branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
+
+
+.globl  internal_corrupt
+.type   internal_corrupt, at function
+internal_corrupt:
+    ret
+    nop
+.Lfake_branch_1:
+    .inst 0x14000001  // Opcode 0x14=b, check for internal branch: b + 0x4
+.Lgarbage_1:
+    .word 0xffffffff
+.size   internal_corrupt,.-internal_corrupt
+
+
+.globl  external_corrupt
+.type   external_corrupt, at function
+external_corrupt:
+    ret
+    nop
+.Lfake_branch_2:
+    .inst   0x14000004  // Opcode 0x14=b, check for external branch: b + 0xf
+.size   external_corrupt,.-external_corrupt



More information about the llvm-commits mailing list