[llvm] [BOLT] Add validation for direct call/branch targets, bypassing invalid functions (PR #165406)

Jinjie Huang via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 5 10:55:53 PST 2025


https://github.com/Jinjie-Huang updated https://github.com/llvm/llvm-project/pull/165406

>From 6727406699f96b56973c7794e99c7ce5baf035b5 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Tue, 28 Oct 2025 22:17:14 +0800
Subject: [PATCH 1/7] validate direct branch target

---
 bolt/include/bolt/Core/BinaryFunction.h |  7 +++++
 bolt/lib/Core/BinaryFunction.cpp        | 40 +++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index a720c6af216d7..b41c35c86bb00 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2264,6 +2264,13 @@ class BinaryFunction {
   /// it is probably another function.
   bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;
 
+  /// Validates if the target of a direct branch/call is a valid
+  /// executable instruction.
+  /// Return true if the target is valid, false otherwise.
+  bool validateBranchTarget(uint64_t TargetAddress,
+                            uint64_t AbsoluteInstrAddr,
+                            const ArrayRef<uint8_t> &CurrentFunctionData);
+
   /// Disassemble function from raw data.
   /// If successful, this function will populate the list of instructions
   /// for this function together with offsets from the function start
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index a0d8385aa3824..a835af0d1d5a6 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1283,6 +1283,41 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
   return std::nullopt;
 }
 
+bool BinaryFunction::validateBranchTarget(
+    uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
+    const ArrayRef<uint8_t> &CurrentFunctionData) {
+  if (auto *TargetFunc =
+          BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+    const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
+    ArrayRef<uint8_t> TargetFunctionData;
+    // Check if the target address is within the current function.
+    if (TargetFunc == this) {
+      TargetFunctionData = CurrentFunctionData;
+    } else {
+      // external call/branch, fetch the binary data for target
+      ErrorOr<ArrayRef<uint8_t>> TargetDataOrErr = TargetFunc->getData();
+      assert(TargetDataOrErr && "function data is not available");
+      TargetFunctionData = *TargetDataOrErr;
+    }
+
+    MCInst TargetInst;
+    uint64_t TargetInstSize;
+    if (!BC.SymbolicDisAsm->getInstruction(
+            TargetInst, TargetInstSize, TargetFunctionData.slice(TargetOffset),
+            TargetAddress, nulls())) {
+      // If the target address cannot be disassembled well,
+      // it implies a corrupted control flow.
+      BC.errs() << "BOLT-WARNING: direct branch/call at 0x"
+                << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
+                << *this << " targets an invalid instruction at 0x"
+                << Twine::utohexstr(TargetAddress) << "\n";
+      return false;
+    }
+  }
+
+  return true;
+}
+
 Error BinaryFunction::disassemble() {
   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
                      "Build Binary Functions", opts::TimeBuild);
@@ -1396,6 +1431,11 @@ Error BinaryFunction::disassemble() {
       uint64_t TargetAddress = 0;
       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
                               TargetAddress)) {
+        if (!validateBranchTarget(TargetAddress, AbsoluteInstrAddr,
+                                  FunctionData)) {
+          setIgnored();
+          break;
+        }
         // Check if the target is within the same function. Otherwise it's
         // a call, possibly a tail call.
         //

>From 7091ba817ab8683ca80363a870af41635242791a Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Fri, 14 Nov 2025 15:41:47 +0800
Subject: [PATCH 2/7] move validation after disassembler

---
 bolt/include/bolt/Core/BinaryFunction.h | 17 +++--
 bolt/lib/Core/BinaryContext.cpp         |  3 +
 bolt/lib/Core/BinaryFunction.cpp        | 96 ++++++++++++++-----------
 bolt/lib/Rewrite/RewriteInstance.cpp    |  1 +
 bolt/test/X86/validate-branch-target.s  | 47 ++++++++++++
 5 files changed, 117 insertions(+), 47 deletions(-)
 create mode 100644 bolt/test/X86/validate-branch-target.s

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index b41c35c86bb00..e20319f64eaae 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2264,13 +2264,6 @@ class BinaryFunction {
   /// it is probably another function.
   bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;
 
-  /// Validates if the target of a direct branch/call is a valid
-  /// executable instruction.
-  /// Return true if the target is valid, false otherwise.
-  bool validateBranchTarget(uint64_t TargetAddress,
-                            uint64_t AbsoluteInstrAddr,
-                            const ArrayRef<uint8_t> &CurrentFunctionData);
-
   /// Disassemble function from raw data.
   /// If successful, this function will populate the list of instructions
   /// for this function together with offsets from the function start
@@ -2327,6 +2320,16 @@ class BinaryFunction {
   /// zero-value bytes.
   bool isZeroPaddingAt(uint64_t Offset) const;
 
+  /// Validates if the target of an external direct branch/call is a valid
+  /// executable instruction.
+  /// Return true if the target is valid, false otherwise.
+  bool validateExternalBranch(uint64_t TargetAddress);
+
+  /// Validates if the target of an internal direct branch/call is a valid
+  /// executable instruction.
+  /// Return true if the target is valid, false otherwise.
+  bool validateInternalBranch();
+
   /// Check that entry points have an associated instruction at their
   /// offsets after disassembly.
   void postProcessEntryPoints();
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 6a285f5538dbd..0caaba9a28b99 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1421,6 +1421,9 @@ void BinaryContext::processInterproceduralReferences() {
     if (&Function == TargetFunction)
       continue;
 
+    if (!Function.validateExternalBranch(Address))
+      continue;
+
     if (TargetFunction) {
       if (TargetFunction->isFragment() &&
           !areRelatedFragments(TargetFunction, &Function)) {
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index a835af0d1d5a6..43e72a17702e1 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1283,41 +1283,6 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const {
   return std::nullopt;
 }
 
-bool BinaryFunction::validateBranchTarget(
-    uint64_t TargetAddress, uint64_t AbsoluteInstrAddr,
-    const ArrayRef<uint8_t> &CurrentFunctionData) {
-  if (auto *TargetFunc =
-          BC.getBinaryFunctionContainingAddress(TargetAddress)) {
-    const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress();
-    ArrayRef<uint8_t> TargetFunctionData;
-    // Check if the target address is within the current function.
-    if (TargetFunc == this) {
-      TargetFunctionData = CurrentFunctionData;
-    } else {
-      // external call/branch, fetch the binary data for target
-      ErrorOr<ArrayRef<uint8_t>> TargetDataOrErr = TargetFunc->getData();
-      assert(TargetDataOrErr && "function data is not available");
-      TargetFunctionData = *TargetDataOrErr;
-    }
-
-    MCInst TargetInst;
-    uint64_t TargetInstSize;
-    if (!BC.SymbolicDisAsm->getInstruction(
-            TargetInst, TargetInstSize, TargetFunctionData.slice(TargetOffset),
-            TargetAddress, nulls())) {
-      // If the target address cannot be disassembled well,
-      // it implies a corrupted control flow.
-      BC.errs() << "BOLT-WARNING: direct branch/call at 0x"
-                << Twine::utohexstr(AbsoluteInstrAddr) << " in function "
-                << *this << " targets an invalid instruction at 0x"
-                << Twine::utohexstr(TargetAddress) << "\n";
-      return false;
-    }
-  }
-
-  return true;
-}
-
 Error BinaryFunction::disassemble() {
   NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
                      "Build Binary Functions", opts::TimeBuild);
@@ -1431,11 +1396,6 @@ Error BinaryFunction::disassemble() {
       uint64_t TargetAddress = 0;
       if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
                               TargetAddress)) {
-        if (!validateBranchTarget(TargetAddress, AbsoluteInstrAddr,
-                                  FunctionData)) {
-          setIgnored();
-          break;
-        }
         // Check if the target is within the same function. Otherwise it's
         // a call, possibly a tail call.
         //
@@ -1948,6 +1908,62 @@ bool BinaryFunction::scanExternalRefs() {
   return Success;
 }
 
+bool BinaryFunction::validateExternalBranch(uint64_t TargetAddress) {
+  if (!isSimple())
+    return true;
+
+  BinaryFunction *TargetFunction =
+      BC.getBinaryFunctionContainingAddress(TargetAddress);
+
+  bool IsValid = true;
+
+  if (TargetFunction) {
+    const uint64_t TargetOffset = TargetAddress - TargetFunction->getAddress();
+    if (TargetFunction->CurrentState == State::Disassembled &&
+        !TargetFunction->getInstructionAtOffset(TargetOffset))
+      IsValid = false;
+  } else {
+    if (!BC.getSectionForAddress(TargetAddress))
+      IsValid = false;
+  }
+
+  if (!IsValid) {
+    setIgnored();
+    BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
+              << *this
+              << ", an external branch/call targets an invalid instruction "
+              << "at address 0x" << Twine::utohexstr(TargetAddress) << "\n";
+    return false;
+  }
+
+  return true;
+}
+
+bool BinaryFunction::validateInternalBranch() {
+  if (!isSimple())
+    return true;
+
+  for (const auto &KV : Labels) {
+    MCSymbol *Label = KV.second;
+    if (getSecondaryEntryPointSymbol(Label))
+      continue;
+
+    const uint32_t Offset = KV.first;
+
+    if (getInstructionAtOffset(Offset) || !Offset)
+      continue;
+
+    BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
+              << *this << ", an internal branch/call targets an invalid "
+              << "instruction at address 0x"
+              << Twine::utohexstr(getAddress() + Offset) << "\n";
+    setIgnored();
+    return false;
+  }
+
+  return true;
+}
+
 void BinaryFunction::postProcessEntryPoints() {
   if (!isSimple())
     return;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 8d6731e7540a8..db85d047711ba 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3498,6 +3498,7 @@ void RewriteInstance::disassembleFunctions() {
     if (!shouldDisassemble(Function))
       continue;
 
+    Function.validateInternalBranch();
     Function.postProcessEntryPoints();
     Function.postProcessJumpTables();
   }
diff --git a/bolt/test/X86/validate-branch-target.s b/bolt/test/X86/validate-branch-target.s
new file mode 100644
index 0000000000000..c0493ad74ae0f
--- /dev/null
+++ b/bolt/test/X86/validate-branch-target.s
@@ -0,0 +1,47 @@
+## Test that BOLT errs when detecting the target 
+## of a direct call/branch is a invalid instruction
+
+# REQUIRES: system-linux
+# RUN: rm -rf %t && mkdir -p %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o main.o
+# RUN: %clang %cflags -pie -Wl,-q %t/main.o -o main.exe
+# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt -lite=0 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
+
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function external_corrcupt, an external branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function internal_corrcupt, an internal branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
+
+
+.globl	internal_corrcupt
+.type	internal_corrcupt, at function
+.align	16
+internal_corrcupt:
+	leaq	.Lopts_1(%rip),%rax
+	addq	$25,%rax
+	.byte	0xf3,0xc3
+.L8xchar_1:
+	addq	$12,%rax
+.Ldone_1:
+	.byte	0xf3,0xc3
+.align	64
+.Lopts_1:
+.byte	114,1,52,40,56,120,44,105,110,116,41,0  # data '114' will be disassembled as 'jb', check for internal branch: jb + 0x1
+.align	64
+.size	internal_corrcupt,.-internal_corrcupt
+
+
+.globl	external_corrcupt
+.type	external_corrcupt, at function
+.align	16
+external_corrcupt:
+	leaq	.Lopts_2(%rip),%rax
+	addq	$25,%rax
+	.byte	0xf3,0xc3
+.L8xchar_2:
+	addq	$12,%rax
+.Ldone_2:
+	.byte	0xf3,0xc3
+.align	64
+.Lopts_2:
+.byte	114,99,52,40,56,120,44,99,104,97,114,41,0  # data '114' will be disassembled as 'jb', check for external branch: jb + 0x63
+.align	64
+.size	external_corrcupt,.-external_corrcupt

>From de3f33140392b791189fdaee5ecb1a8da9d5ee4c Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Fri, 14 Nov 2025 17:12:15 +0800
Subject: [PATCH 3/7] add offset check

---
 bolt/lib/Core/BinaryContext.cpp  |  6 +++---
 bolt/lib/Core/BinaryFunction.cpp | 11 ++++++++++-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 0caaba9a28b99..448e3eb46c6b4 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1421,9 +1421,6 @@ void BinaryContext::processInterproceduralReferences() {
     if (&Function == TargetFunction)
       continue;
 
-    if (!Function.validateExternalBranch(Address))
-      continue;
-
     if (TargetFunction) {
       if (TargetFunction->isFragment() &&
           !areRelatedFragments(TargetFunction, &Function)) {
@@ -1441,6 +1438,9 @@ void BinaryContext::processInterproceduralReferences() {
       continue;
     }
 
+    if (!Function.validateExternalBranch(Address))
+      continue;
+
     // Check if address falls in function padding space - this could be
     // unmarked data in code. In this case adjust the padding space size.
     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 43e72a17702e1..6e616362ba1a8 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1919,6 +1919,11 @@ bool BinaryFunction::validateExternalBranch(uint64_t TargetAddress) {
 
   if (TargetFunction) {
     const uint64_t TargetOffset = TargetAddress - TargetFunction->getAddress();
+    // Skip empty functions and out-of-bounds offsets,
+    // as they may not be disassembled.
+    if (!TargetOffset || (TargetOffset > TargetFunction->getSize()))
+      return true;
+
     if (TargetFunction->CurrentState == State::Disassembled &&
         !TargetFunction->getInstructionAtOffset(TargetOffset))
       IsValid = false;
@@ -1949,8 +1954,12 @@ bool BinaryFunction::validateInternalBranch() {
       continue;
 
     const uint32_t Offset = KV.first;
+    // Skip empty functions and out-of-bounds offsets,
+    // as they may not be disassembled.
+    if (!Offset || (Offset > getSize()))
+      continue;
 
-    if (getInstructionAtOffset(Offset) || !Offset)
+    if (getInstructionAtOffset(Offset))
       continue;
 
     BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "

>From 5bfce8bb524383a23371a2437a317ad5543ddf0e Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Mon, 1 Dec 2025 21:45:05 +0800
Subject: [PATCH 4/7] add aarch64 test case

---
 bolt/lib/Core/BinaryFunction.cpp | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 6e616362ba1a8..47a1c506d8fbd 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1044,8 +1044,10 @@ MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address) {
 
   // For AArch64, check if this address is part of a constant island.
   if (BC.isAArch64()) {
-    if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
+    if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address)) {
+      Labels[Offset] = IslandSym;
       return IslandSym;
+    }
   }
 
   if (Offset == getSize())
@@ -1925,7 +1927,8 @@ bool BinaryFunction::validateExternalBranch(uint64_t TargetAddress) {
       return true;
 
     if (TargetFunction->CurrentState == State::Disassembled &&
-        !TargetFunction->getInstructionAtOffset(TargetOffset))
+        (!TargetFunction->getInstructionAtOffset(TargetOffset) ||
+         getSizeOfDataInCodeAt(TargetOffset)))
       IsValid = false;
   } else {
     if (!BC.getSectionForAddress(TargetAddress))
@@ -1959,15 +1962,14 @@ bool BinaryFunction::validateInternalBranch() {
     if (!Offset || (Offset > getSize()))
       continue;
 
-    if (getInstructionAtOffset(Offset))
-      continue;
-
-    BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
-              << *this << ", an internal branch/call targets an invalid "
-              << "instruction at address 0x"
-              << Twine::utohexstr(getAddress() + Offset) << "\n";
-    setIgnored();
-    return false;
+    if (!getInstructionAtOffset(Offset) || getSizeOfDataInCodeAt(Offset)) {
+      BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
+                << *this << ", an internal branch/call targets an invalid "
+                << "instruction at address 0x"
+                << Twine::utohexstr(getAddress() + Offset) << "\n";
+      setIgnored();
+      return false;
+    }
   }
 
   return true;

>From 5265b3329129f38b4de6cf792a6d4e6baf9cbf2b Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Mon, 1 Dec 2025 22:09:04 +0800
Subject: [PATCH 5/7] add aarch64 test case

---
 bolt/lib/Core/BinaryFunction.cpp              | 24 +++++++-------
 bolt/test/AArch64/constant-island-alignment.s |  1 +
 bolt/test/AArch64/validate-branch-target.s    | 33 +++++++++++++++++++
 bolt/test/X86/validate-branch-target.s        | 20 +++++------
 4 files changed, 57 insertions(+), 21 deletions(-)
 create mode 100644 bolt/test/AArch64/validate-branch-target.s

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 6e616362ba1a8..47a1c506d8fbd 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1044,8 +1044,10 @@ MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address) {
 
   // For AArch64, check if this address is part of a constant island.
   if (BC.isAArch64()) {
-    if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
+    if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address)) {
+      Labels[Offset] = IslandSym;
       return IslandSym;
+    }
   }
 
   if (Offset == getSize())
@@ -1925,7 +1927,8 @@ bool BinaryFunction::validateExternalBranch(uint64_t TargetAddress) {
       return true;
 
     if (TargetFunction->CurrentState == State::Disassembled &&
-        !TargetFunction->getInstructionAtOffset(TargetOffset))
+        (!TargetFunction->getInstructionAtOffset(TargetOffset) ||
+         getSizeOfDataInCodeAt(TargetOffset)))
       IsValid = false;
   } else {
     if (!BC.getSectionForAddress(TargetAddress))
@@ -1959,15 +1962,14 @@ bool BinaryFunction::validateInternalBranch() {
     if (!Offset || (Offset > getSize()))
       continue;
 
-    if (getInstructionAtOffset(Offset))
-      continue;
-
-    BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
-              << *this << ", an internal branch/call targets an invalid "
-              << "instruction at address 0x"
-              << Twine::utohexstr(getAddress() + Offset) << "\n";
-    setIgnored();
-    return false;
+    if (!getInstructionAtOffset(Offset) || getSizeOfDataInCodeAt(Offset)) {
+      BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
+                << *this << ", an internal branch/call targets an invalid "
+                << "instruction at address 0x"
+                << Twine::utohexstr(getAddress() + Offset) << "\n";
+      setIgnored();
+      return false;
+    }
   }
 
   return true;
diff --git a/bolt/test/AArch64/constant-island-alignment.s b/bolt/test/AArch64/constant-island-alignment.s
index 99fe7333e2dba..5462dcb08f82f 100644
--- a/bolt/test/AArch64/constant-island-alignment.s
+++ b/bolt/test/AArch64/constant-island-alignment.s
@@ -53,6 +53,7 @@ _start:
   blr x0
   mov x0, #1
   ret
+.size _start,.-_start
   nop
 # CHECK: {{0|8}} <$d>:
 .Lci:
diff --git a/bolt/test/AArch64/validate-branch-target.s b/bolt/test/AArch64/validate-branch-target.s
new file mode 100644
index 0000000000000..935663d090c10
--- /dev/null
+++ b/bolt/test/AArch64/validate-branch-target.s
@@ -0,0 +1,33 @@
+## Test that BOLT errs when detecting the target 
+## of a direct call/branch is a invalid instruction
+
+# REQUIRES: system-linux
+# RUN: rm -rf %t && mkdir -p %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-linux %s -o main.o
+# RUN: %clang %cflags %t/main.o -o main.exe -Wl,-q
+# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt -lite=0 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
+
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function external_corrupt, an external branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function internal_corrupt, an internal branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
+
+
+.globl  internal_corrupt
+.type   internal_corrupt, at function
+internal_corrupt:
+    ret
+    nop
+.Lfake_branch_1:
+    .inst 0x14000001  // Opcode 0x14=b, check for internal branch: b + 0x4
+.Lgarbage_1:
+    .word 0xffffffff
+.size   internal_corrupt,.-internal_corrupt
+
+
+.globl  external_corrupt
+.type   external_corrupt, at function
+external_corrupt:
+    ret
+    nop
+.Lfake_branch_2:
+    .inst   0x14000004  // Opcode 0x14=b, check for external branch: b + 0xf
+.size   external_corrupt,.-external_corrupt
diff --git a/bolt/test/X86/validate-branch-target.s b/bolt/test/X86/validate-branch-target.s
index c0493ad74ae0f..2c61ccca90da8 100644
--- a/bolt/test/X86/validate-branch-target.s
+++ b/bolt/test/X86/validate-branch-target.s
@@ -7,14 +7,14 @@
 # RUN: %clang %cflags -pie -Wl,-q %t/main.o -o main.exe
 # RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt -lite=0 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
 
-# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function external_corrcupt, an external branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
-# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function internal_corrcupt, an internal branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function external_corrupt, an external branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function internal_corrupt, an internal branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}
 
 
-.globl	internal_corrcupt
-.type	internal_corrcupt, at function
+.globl	internal_corrupt
+.type	internal_corrupt, at function
 .align	16
-internal_corrcupt:
+internal_corrupt:
 	leaq	.Lopts_1(%rip),%rax
 	addq	$25,%rax
 	.byte	0xf3,0xc3
@@ -26,13 +26,13 @@ internal_corrcupt:
 .Lopts_1:
 .byte	114,1,52,40,56,120,44,105,110,116,41,0  # data '114' will be disassembled as 'jb', check for internal branch: jb + 0x1
 .align	64
-.size	internal_corrcupt,.-internal_corrcupt
+.size	internal_corrupt,.-internal_corrupt
 
 
-.globl	external_corrcupt
-.type	external_corrcupt, at function
+.globl	external_corrupt
+.type	external_corrupt, at function
 .align	16
-external_corrcupt:
+external_corrupt:
 	leaq	.Lopts_2(%rip),%rax
 	addq	$25,%rax
 	.byte	0xf3,0xc3
@@ -44,4 +44,4 @@ external_corrcupt:
 .Lopts_2:
 .byte	114,99,52,40,56,120,44,99,104,97,114,41,0  # data '114' will be disassembled as 'jb', check for external branch: jb + 0x63
 .align	64
-.size	external_corrcupt,.-external_corrcupt
+.size	external_corrupt,.-external_corrupt

>From 7cdb17514d8eff8576d9acb105af48ff70fbf0c5 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Thu, 4 Dec 2025 19:59:18 +0800
Subject: [PATCH 6/7] nit: rename to validateInternalBranches()

---
 bolt/include/bolt/Core/BinaryFunction.h | 8 ++++----
 bolt/lib/Core/BinaryFunction.cpp        | 2 +-
 bolt/lib/Rewrite/RewriteInstance.cpp    | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index e20319f64eaae..b9bad31393a1b 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2320,15 +2320,15 @@ class BinaryFunction {
   /// zero-value bytes.
   bool isZeroPaddingAt(uint64_t Offset) const;
 
-  /// Validates if the target of an external direct branch/call is a valid
+  /// Validate if the target of an external direct branch/call is a valid
   /// executable instruction.
   /// Return true if the target is valid, false otherwise.
   bool validateExternalBranch(uint64_t TargetAddress);
 
-  /// Validates if the target of an internal direct branch/call is a valid
+  /// Validate if the target of any internal direct branch/call is a valid
   /// executable instruction.
-  /// Return true if the target is valid, false otherwise.
-  bool validateInternalBranch();
+  /// Return true if all the targets are valid, false otherwise.
+  bool validateInternalBranches();
 
   /// Check that entry points have an associated instruction at their
   /// offsets after disassembly.
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 47a1c506d8fbd..c6462e2a68013 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1947,7 +1947,7 @@ bool BinaryFunction::validateExternalBranch(uint64_t TargetAddress) {
   return true;
 }
 
-bool BinaryFunction::validateInternalBranch() {
+bool BinaryFunction::validateInternalBranches() {
   if (!isSimple())
     return true;
 
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index db85d047711ba..ce3a7ca68ee6e 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3498,7 +3498,7 @@ void RewriteInstance::disassembleFunctions() {
     if (!shouldDisassemble(Function))
       continue;
 
-    Function.validateInternalBranch();
+    Function.validateInternalBranches();
     Function.postProcessEntryPoints();
     Function.postProcessJumpTables();
   }

>From 465d03e62cd52d47703eeffc90e1d07bea0b4d98 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Sat, 6 Dec 2025 02:54:29 +0800
Subject: [PATCH 7/7] refactor external branch and simplify tests

---
 bolt/include/bolt/Core/BinaryFunction.h    |  5 ---
 bolt/lib/Core/BinaryContext.cpp            | 17 ++++++++--
 bolt/lib/Core/BinaryFunction.cpp           | 39 +---------------------
 bolt/test/AArch64/validate-branch-target.s | 21 +++++++-----
 bolt/test/X86/validate-branch-target.s     | 34 ++++++-------------
 5 files changed, 37 insertions(+), 79 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index b9bad31393a1b..3ce9fb14ff87a 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2320,11 +2320,6 @@ class BinaryFunction {
   /// zero-value bytes.
   bool isZeroPaddingAt(uint64_t Offset) const;
 
-  /// Validate if the target of an external direct branch/call is a valid
-  /// executable instruction.
-  /// Return true if the target is valid, false otherwise.
-  bool validateExternalBranch(uint64_t TargetAddress);
-
   /// Validate if the target of any internal direct branch/call is a valid
   /// executable instruction.
   /// Return true if all the targets are valid, false otherwise.
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 448e3eb46c6b4..e80de168714a9 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1431,6 +1431,20 @@ void BinaryContext::processInterproceduralReferences() {
             << TargetFunction->getPrintName() << '\n';
       }
 
+      const uint64_t TargetOffset = Address - TargetFunction->getAddress();
+      if (TargetOffset && (TargetOffset <= TargetFunction->getSize())) {
+        if (TargetFunction->CurrentState ==
+                BinaryFunction::State::Disassembled &&
+            (!TargetFunction->getInstructionAtOffset(TargetOffset) ||
+             TargetFunction->getSizeOfDataInCodeAt(TargetOffset)))
+          this->errs()
+              << "BOLT-WARNING: corrupted control flow detected in function "
+              << Function
+              << ", an external branch/call targets an invalid instruction "
+              << "at address 0x" << Twine::utohexstr(Address) << "\n";
+        Function.setIgnored();
+      }
+
       // Create an extra entry point if needed. Can also render the target
       // function ignored if the reference is invalid.
       handleExternalBranchTarget(Address, *TargetFunction);
@@ -1438,9 +1452,6 @@ void BinaryContext::processInterproceduralReferences() {
       continue;
     }
 
-    if (!Function.validateExternalBranch(Address))
-      continue;
-
     // Check if address falls in function padding space - this could be
     // unmarked data in code. In this case adjust the padding space size.
     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index c6462e2a68013..469af59f4a952 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1910,45 +1910,8 @@ bool BinaryFunction::scanExternalRefs() {
   return Success;
 }
 
-bool BinaryFunction::validateExternalBranch(uint64_t TargetAddress) {
-  if (!isSimple())
-    return true;
-
-  BinaryFunction *TargetFunction =
-      BC.getBinaryFunctionContainingAddress(TargetAddress);
-
-  bool IsValid = true;
-
-  if (TargetFunction) {
-    const uint64_t TargetOffset = TargetAddress - TargetFunction->getAddress();
-    // Skip empty functions and out-of-bounds offsets,
-    // as they may not be disassembled.
-    if (!TargetOffset || (TargetOffset > TargetFunction->getSize()))
-      return true;
-
-    if (TargetFunction->CurrentState == State::Disassembled &&
-        (!TargetFunction->getInstructionAtOffset(TargetOffset) ||
-         getSizeOfDataInCodeAt(TargetOffset)))
-      IsValid = false;
-  } else {
-    if (!BC.getSectionForAddress(TargetAddress))
-      IsValid = false;
-  }
-
-  if (!IsValid) {
-    setIgnored();
-    BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
-              << *this
-              << ", an external branch/call targets an invalid instruction "
-              << "at address 0x" << Twine::utohexstr(TargetAddress) << "\n";
-    return false;
-  }
-
-  return true;
-}
-
 bool BinaryFunction::validateInternalBranches() {
-  if (!isSimple())
+  if (!isSimple() || TrapsOnEntry)
     return true;
 
   for (const auto &KV : Labels) {
diff --git a/bolt/test/AArch64/validate-branch-target.s b/bolt/test/AArch64/validate-branch-target.s
index 935663d090c10..d38e53e77cba1 100644
--- a/bolt/test/AArch64/validate-branch-target.s
+++ b/bolt/test/AArch64/validate-branch-target.s
@@ -14,11 +14,8 @@
 .globl  internal_corrupt
 .type   internal_corrupt, at function
 internal_corrupt:
-    ret
-    nop
-.Lfake_branch_1:
-    .inst 0x14000001  // Opcode 0x14=b, check for internal branch: b + 0x4
-.Lgarbage_1:
+    b constant_island_0  // targeting the data in code
+constant_island_0:
     .word 0xffffffff
 .size   internal_corrupt,.-internal_corrupt
 
@@ -26,8 +23,14 @@ internal_corrupt:
 .globl  external_corrupt
 .type   external_corrupt, at function
 external_corrupt:
-    ret
-    nop
-.Lfake_branch_2:
-    .inst   0x14000004  // Opcode 0x14=b, check for external branch: b + 0xf
+    b   constant_island_1  // targeting the data in code externally
 .size   external_corrupt,.-external_corrupt
+
+.globl  external_func
+.type   external_func, at function
+external_func:
+    add x0, x0, x1
+constant_island_1:
+    .word 0xffffffff // data in code
+    ret
+.size   external_func,.-external_func
diff --git a/bolt/test/X86/validate-branch-target.s b/bolt/test/X86/validate-branch-target.s
index 2c61ccca90da8..11b0a73fc7253 100644
--- a/bolt/test/X86/validate-branch-target.s
+++ b/bolt/test/X86/validate-branch-target.s
@@ -13,35 +13,21 @@
 
 .globl	internal_corrupt
 .type	internal_corrupt, at function
-.align	16
 internal_corrupt:
-	leaq	.Lopts_1(%rip),%rax
-	addq	$25,%rax
-	.byte	0xf3,0xc3
-.L8xchar_1:
-	addq	$12,%rax
-.Ldone_1:
-	.byte	0xf3,0xc3
-.align	64
-.Lopts_1:
-.byte	114,1,52,40,56,120,44,105,110,116,41,0  # data '114' will be disassembled as 'jb', check for internal branch: jb + 0x1
-.align	64
+	jb  data_in_code + 1  # targeting the data in code, and jump into the middle of 'xorb' instruction
+data_in_code:
+	.byte 0x34, 0x01 # data in code, will be disassembled as 'xorb 0x1, %al'
 .size	internal_corrupt,.-internal_corrupt
 
 
 .globl	external_corrupt
 .type	external_corrupt, at function
-.align	16
 external_corrupt:
-	leaq	.Lopts_2(%rip),%rax
-	addq	$25,%rax
-	.byte	0xf3,0xc3
-.L8xchar_2:
-	addq	$12,%rax
-.Ldone_2:
-	.byte	0xf3,0xc3
-.align	64
-.Lopts_2:
-.byte	114,99,52,40,56,120,44,99,104,97,114,41,0  # data '114' will be disassembled as 'jb', check for external branch: jb + 0x63
-.align	64
+	jb  external_func + 1  # targeting the middle of normal instruction externally
 .size	external_corrupt,.-external_corrupt
+
+.globl	external_func
+.type	external_func, at function
+external_func:
+	addq  $1, %rax  # normal instruction
+.size	external_func,.-external_func



More information about the llvm-commits mailing list