[llvm] [BOLT] Properly validate relocations against internals of a function (PR #167451)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 10 21:33:36 PST 2025


https://github.com/maksfb updated https://github.com/llvm/llvm-project/pull/167451

>From b8ece51e2b1666598e60eb0d3ad75f456ad5e7ab Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Wed, 5 Nov 2025 21:16:15 -0800
Subject: [PATCH] [BOLT] Properly validate relocations against internals of a
 function

Validation of data relocations targeting internals of a function was
happening based on offsets inside a function. As a result, if multiple
relocations were targeting the same offset, and one of the relocations
was verified, e.g. as belonging to a jump table, then all relocations
targeting the offset would be considered verified and valid.

Now that we are tracking relocations pointing inside every function, we
can do a better validation based on the location of the relocation.
E.g., if a relocation belongs to a jump table only that relocation will
be accounted for and other relocations pointing to the same address will
be evaluated independently.
---
 bolt/include/bolt/Core/BinaryFunction.h      |  7 +-
 bolt/lib/Core/BinaryFunction.cpp             | 92 ++++++++++----------
 bolt/test/X86/unclaimed-jt-entries.s         |  7 +-
 bolt/test/runtime/X86/unclaimed-jt-entries.s |  9 +-
 4 files changed, 54 insertions(+), 61 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index a720c6af216d7..5eb9de7a1d6e6 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2356,10 +2356,9 @@ class BinaryFunction {
   bool postProcessIndirectBranches(MCPlusBuilder::AllocatorIdTy AllocId);
 
   /// Validate that all data references to function offsets are claimed by
-  /// recognized jump tables. Register externally referenced blocks as entry
-  /// points. Returns true if there are no unclaimed externally referenced
-  /// offsets.
-  bool validateExternallyReferencedOffsets();
+  /// recognized jump tables. Returns true if there are no unclaimed externally
+  /// referenced offsets.
+  bool validateInternalRefDataRelocations();
 
   /// Return all call site profile info for this function.
   IndirectCallSiteProfile &getAllCallSites() { return AllCallSites; }
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index a0d8385aa3824..051de6486f269 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -2049,41 +2049,47 @@ void BinaryFunction::postProcessJumpTables() {
   }
 }
 
-bool BinaryFunction::validateExternallyReferencedOffsets() {
-  SmallPtrSet<MCSymbol *, 4> JTTargets;
-  for (const JumpTable *JT : llvm::make_second_range(JumpTables))
-    JTTargets.insert_range(JT->Entries);
-
-  bool HasUnclaimedReference = false;
-  for (uint64_t Destination : ExternallyReferencedOffsets) {
-    // Ignore __builtin_unreachable().
-    if (Destination == getSize())
-      continue;
-    // Ignore constant islands
-    if (isInConstantIsland(Destination + getAddress()))
-      continue;
+bool BinaryFunction::validateInternalRefDataRelocations() {
+  if (InternalRefDataRelocations.empty())
+    return true;
 
-    if (BinaryBasicBlock *BB = getBasicBlockAtOffset(Destination)) {
-      // Check if the externally referenced offset is a recognized jump table
-      // target.
-      if (JTTargets.contains(BB->getLabel()))
-        continue;
+  // Rely on the user hint that all data refs are valid and only used as
+  // destinations by indirect branch in the same function.
+  if (opts::StrictMode)
+    return true;
 
-      if (opts::Verbosity >= 1) {
-        BC.errs() << "BOLT-WARNING: unclaimed data to code reference (possibly "
-                  << "an unrecognized jump table entry) to " << BB->getName()
-                  << " in " << *this << "\n";
-      }
-      auto L = BC.scopeLock();
-      addEntryPoint(*BB);
-    } else {
-      BC.errs() << "BOLT-WARNING: unknown data to code reference to offset "
-                << Twine::utohexstr(Destination) << " in " << *this << "\n";
-      setIgnored();
+  DenseSet<uint64_t> UnclaimedRelocations(InternalRefDataRelocations);
+  for (const JumpTable *JT : llvm::make_second_range(JumpTables)) {
+    uint64_t EntryAddress = JT->getAddress();
+    while (EntryAddress < JT->getAddress() + JT->getSize()) {
+      UnclaimedRelocations.erase(EntryAddress);
+      EntryAddress += JT->EntrySize;
     }
-    HasUnclaimedReference = true;
   }
-  return !HasUnclaimedReference;
+
+  if (UnclaimedRelocations.empty())
+    return true;
+
+  BC.errs() << "BOLT-WARNING: " << UnclaimedRelocations.size()
+            << " unclaimed data relocation"
+            << (UnclaimedRelocations.size() > 1 ? "s" : "")
+            << " remain against function " << *this;
+  if (opts::Verbosity) {
+    BC.errs() << ":\n";
+    for (uint64_t RelocationAddress : UnclaimedRelocations) {
+      const Relocation *Relocation = BC.getRelocationAt(RelocationAddress);
+      BC.errs() << "  ";
+      if (Relocation)
+        BC.errs() << *Relocation;
+      else
+        BC.errs() << "<missing relocation>";
+      BC.errs() << '\n';
+    }
+  } else {
+    BC.errs() << ". Re-run with -v=1 to see the list\n";
+  }
+
+  return false;
 }
 
 bool BinaryFunction::postProcessIndirectBranches(
@@ -2208,14 +2214,6 @@ bool BinaryFunction::postProcessIndirectBranches(
     LastIndirectJumpBB->updateJumpTableSuccessors();
   }
 
-  // Validate that all data references to function offsets are claimed by
-  // recognized jump tables. Register externally referenced blocks as entry
-  // points.
-  if (!opts::StrictMode && hasInternalReference()) {
-    if (!validateExternallyReferencedOffsets())
-      return false;
-  }
-
   if (HasUnknownControlFlow && !BC.HasRelocations)
     return false;
 
@@ -2504,12 +2502,18 @@ Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
   CurrentState = State::CFG;
 
   // Make any necessary adjustments for indirect branches.
-  if (!postProcessIndirectBranches(AllocatorId)) {
-    if (opts::Verbosity) {
-      BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for "
-                << *this << '\n';
-    }
+  bool ValidCFG = postProcessIndirectBranches(AllocatorId);
+  if (!ValidCFG && opts::Verbosity) {
+    BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for "
+              << *this << '\n';
+  }
+
+  // Validate that all data references to function offsets are claimed by
+  // recognized jump tables.
+  if (ValidCFG)
+    ValidCFG = validateInternalRefDataRelocations();
 
+  if (!ValidCFG) {
     if (BC.isAArch64())
       PreserveNops = BC.HasRelocations;
 
diff --git a/bolt/test/X86/unclaimed-jt-entries.s b/bolt/test/X86/unclaimed-jt-entries.s
index 31b72c47125ae..6cb0b081e2f72 100644
--- a/bolt/test/X86/unclaimed-jt-entries.s
+++ b/bolt/test/X86/unclaimed-jt-entries.s
@@ -32,12 +32,7 @@
 
 # RUN: llvm-bolt %t.exe -v=1 -o %t.out 2>&1 | FileCheck %s
 
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main
-# CHECK: BOLT-WARNING: failed to post-process indirect branches for main
+# CHECK: BOLT-WARNING: 11 unclaimed data relocations remain against function main
 
   .text
   .globl main
diff --git a/bolt/test/runtime/X86/unclaimed-jt-entries.s b/bolt/test/runtime/X86/unclaimed-jt-entries.s
index 1725fb808efbf..943b7e20a19b3 100644
--- a/bolt/test/runtime/X86/unclaimed-jt-entries.s
+++ b/bolt/test/runtime/X86/unclaimed-jt-entries.s
@@ -18,14 +18,9 @@
 
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
 # RUN: %clang %cflags %S/Inputs/unclaimed-jt-entries.c -no-pie %t.o -o %t.exe -Wl,-q
-# RUN: llvm-bolt %t.exe -v=1 -o %t.out --sequential-disassembly 2>&1 | FileCheck %s
+# RUN: llvm-bolt %t.exe -o %t.out 2>&1 | FileCheck %s
 
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in func
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in func
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in func
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in func
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in func
-# CHECK: BOLT-WARNING: failed to post-process indirect branches for func
+# CHECK: BOLT-WARNING: 11 unclaimed data relocations remain against function func
 
 # Run the optimized binary
 # RUN: %t.out 3 | FileCheck %s --check-prefix=CHECK3



More information about the llvm-commits mailing list