[llvm] [BOLT] Validate extra entry point by querying data marker symbols (PR #154611)

YongKang Zhu via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 20 13:46:23 PDT 2025


https://github.com/yozhu created https://github.com/llvm/llvm-project/pull/154611

Look up marker symbols and decide whether candidate is really extra entry
point in `adjustFunctionBoundaries()`.

>From db4cbdbead173a863bffc142e79a1c7c492c2dd2 Mon Sep 17 00:00:00 2001
From: YongKang Zhu <yongzhu at fb.com>
Date: Mon, 18 Aug 2025 14:33:27 -0700
Subject: [PATCH] [BOLT] Validate extra entry point candidate by querying data
 marker symbols

Look up marker symbols and decide whether candidate is really extra entry
point in `adjustFunctionBoundaries()`.
---
 bolt/include/bolt/Core/BinaryFunction.h       |  5 --
 bolt/include/bolt/Rewrite/RewriteInstance.h   |  2 +-
 bolt/lib/Core/BinaryFunction.cpp              |  8 +---
 bolt/lib/Rewrite/RewriteInstance.cpp          | 48 +++++++++----------
 ...data-marker-invalidates-extra-entrypoint.s | 38 +++++++++++++++
 5 files changed, 63 insertions(+), 38 deletions(-)
 create mode 100644 bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index ae580520b9110..b59926cc75571 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -1196,11 +1196,6 @@ class BinaryFunction {
     return getSecondaryEntryPointSymbol(BB.getLabel());
   }
 
-  /// Remove a label from the secondary entry point map.
-  void removeSymbolFromSecondaryEntryPointMap(const MCSymbol *Label) {
-    SecondaryEntryPoints.erase(Label);
-  }
-
   /// Return true if the basic block is an entry point into the function
   /// (either primary or secondary).
   bool isEntryPoint(const BinaryBasicBlock &BB) const {
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 91d62a78de390..19dcce8205ebc 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -241,7 +241,7 @@ class RewriteInstance {
 
   /// Adjust function sizes and set proper maximum size values after the whole
   /// symbol table has been processed.
-  void adjustFunctionBoundaries();
+  void adjustFunctionBoundaries(DenseMap<uint64_t, MarkerSymType> &MarkerSyms);
 
   /// Make .eh_frame section relocatable.
   void relocateEHFrameSection();
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index eec68ff5a5fce..8f494f105fbba 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1915,13 +1915,9 @@ void BinaryFunction::postProcessEntryPoints() {
       continue;
 
     // If we have grabbed a wrong code label which actually points to some
-    // constant island inside the function, ignore this label and remove it
-    // from the secondary entry point map.
-    if (isStartOfConstantIsland(Offset)) {
-      BC.SymbolToFunctionMap.erase(Label);
-      removeSymbolFromSecondaryEntryPointMap(Label);
+    // constant island inside the function, ignore this label.
+    if (isStartOfConstantIsland(Offset))
       continue;
-    }
 
     BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
                  "detected in function "
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index b2056ba2380fb..79daa38142ae8 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -881,14 +881,9 @@ void RewriteInstance::discoverFileObjects() {
   // code section (see IHI0056B). $d identifies data contents.
   // Compilers usually merge multiple data objects in a single $d-$x interval,
   // but we need every data object to be marked with $d. Because of that we
-  // create a vector of MarkerSyms with all locations of data objects.
+  // keep track of marker symbols with all locations of data objects.
 
-  struct MarkerSym {
-    uint64_t Address;
-    MarkerSymType Type;
-  };
-
-  std::vector<MarkerSym> SortedMarkerSymbols;
+  DenseMap<uint64_t, MarkerSymType> MarkerSymbols;
   auto addExtraDataMarkerPerSymbol = [&]() {
     bool IsData = false;
     uint64_t LastAddr = 0;
@@ -912,14 +907,14 @@ void RewriteInstance::discoverFileObjects() {
       }
 
       if (MarkerType != MarkerSymType::NONE) {
-        SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType});
+        MarkerSymbols[SymInfo.Address] = MarkerType;
         LastAddr = SymInfo.Address;
         IsData = MarkerType == MarkerSymType::DATA;
         continue;
       }
 
       if (IsData) {
-        SortedMarkerSymbols.push_back({SymInfo.Address, MarkerSymType::DATA});
+        MarkerSymbols[SymInfo.Address] = MarkerSymType::DATA;
         LastAddr = SymInfo.Address;
       }
     }
@@ -1284,27 +1279,24 @@ void RewriteInstance::discoverFileObjects() {
   BC->setHasSymbolsWithFileName(FileSymbols.size());
 
   // Now that all the functions were created - adjust their boundaries.
-  adjustFunctionBoundaries();
+  adjustFunctionBoundaries(MarkerSymbols);
 
   // Annotate functions with code/data markers in AArch64
-  for (auto ISym = SortedMarkerSymbols.begin();
-       ISym != SortedMarkerSymbols.end(); ++ISym) {
-
-    auto *BF =
-        BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
+  for (auto &[Address, Type] : MarkerSymbols) {
+    auto *BF = BC->getBinaryFunctionContainingAddress(Address, true, true);
 
     if (!BF) {
       // Stray marker
       continue;
     }
-    const auto EntryOffset = ISym->Address - BF->getAddress();
-    if (ISym->Type == MarkerSymType::CODE) {
+    const auto EntryOffset = Address - BF->getAddress();
+    if (Type == MarkerSymType::CODE) {
       BF->markCodeAtOffset(EntryOffset);
       continue;
     }
-    if (ISym->Type == MarkerSymType::DATA) {
+    if (Type == MarkerSymType::DATA) {
       BF->markDataAtOffset(EntryOffset);
-      BC->AddressToConstantIslandMap[ISym->Address] = BF;
+      BC->AddressToConstantIslandMap[Address] = BF;
       continue;
     }
     llvm_unreachable("Unknown marker");
@@ -1833,7 +1825,8 @@ void RewriteInstance::disassemblePLT() {
   }
 }
 
-void RewriteInstance::adjustFunctionBoundaries() {
+void RewriteInstance::adjustFunctionBoundaries(
+    DenseMap<uint64_t, MarkerSymType> &MarkerSyms) {
   for (auto BFI = BC->getBinaryFunctions().begin(),
             BFE = BC->getBinaryFunctions().end();
        BFI != BFE; ++BFI) {
@@ -1871,12 +1864,15 @@ void RewriteInstance::adjustFunctionBoundaries() {
         continue;
       }
 
-      // This is potentially another entry point into the function.
-      uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
-      LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
-                        << Function << " at offset 0x"
-                        << Twine::utohexstr(EntryOffset) << '\n');
-      Function.addEntryPointAtOffset(EntryOffset);
+      auto It = MarkerSyms.find(NextSymRefI->first);
+      if (It == MarkerSyms.end() || It->second != MarkerSymType::DATA) {
+        // This is potentially another entry point into the function.
+        uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
+        LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
+                          << Function << " at offset 0x"
+                          << Twine::utohexstr(EntryOffset) << '\n');
+        Function.addEntryPointAtOffset(EntryOffset);
+      }
 
       ++NextSymRefI;
     }
diff --git a/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
new file mode 100644
index 0000000000000..9c77ecefa4b96
--- /dev/null
+++ b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
@@ -0,0 +1,38 @@
+# This test is to ensure that discovered data marker symbol would invalidate
+# extra entry point that was added earlier.
+
+# RUN: %clang %cflags %s -o %t.so -Wl,-q -Wl,--init=_bar -Wl,--fini=_bar
+# RUN: llvm-bolt %t.so -o %t.instr.so
+
+  .text
+  .global _start
+  .type _start, %function
+_start:
+  ret
+
+  .text
+  .global _foo
+  .type _foo, %function
+_foo:
+  cbz x1, _foo_2
+_foo_1:
+  add x1, x2, x0
+  b _foo
+_foo_2:
+  ret
+
+# None of these constant island symbols should be identified as extra entry
+# point for function `_foo'.
+  .align 4
+_const1: .short 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80
+_const2: .short 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0
+_const3: .short 0x04, 0x08, 0x0c, 0x20, 0x60, 0x80, 0xa0, 0xc0
+
+  .text
+  .global _bar
+  .type _bar, %function
+_bar:
+  ret
+
+  # Dummy relocation to force relocation mode
+  .reloc 0, R_AARCH64_NONE



More information about the llvm-commits mailing list