[llvm] [BOLT] Validate extra entry point by querying data marker symbols (PR #154611)
YongKang Zhu via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 13:46:23 PDT 2025
https://github.com/yozhu created https://github.com/llvm/llvm-project/pull/154611
Look up marker symbols and decide whether candidate is really extra entry
point in `adjustFunctionBoundaries()`.
>From db4cbdbead173a863bffc142e79a1c7c492c2dd2 Mon Sep 17 00:00:00 2001
From: YongKang Zhu <yongzhu at fb.com>
Date: Mon, 18 Aug 2025 14:33:27 -0700
Subject: [PATCH] [BOLT] Validate extra entry point candidate by querying data
marker symbols
Look up marker symbols and decide whether candidate is really extra entry
point in `adjustFunctionBoundaries()`.
---
bolt/include/bolt/Core/BinaryFunction.h | 5 --
bolt/include/bolt/Rewrite/RewriteInstance.h | 2 +-
bolt/lib/Core/BinaryFunction.cpp | 8 +---
bolt/lib/Rewrite/RewriteInstance.cpp | 48 +++++++++----------
...data-marker-invalidates-extra-entrypoint.s | 38 +++++++++++++++
5 files changed, 63 insertions(+), 38 deletions(-)
create mode 100644 bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index ae580520b9110..b59926cc75571 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -1196,11 +1196,6 @@ class BinaryFunction {
return getSecondaryEntryPointSymbol(BB.getLabel());
}
- /// Remove a label from the secondary entry point map.
- void removeSymbolFromSecondaryEntryPointMap(const MCSymbol *Label) {
- SecondaryEntryPoints.erase(Label);
- }
-
/// Return true if the basic block is an entry point into the function
/// (either primary or secondary).
bool isEntryPoint(const BinaryBasicBlock &BB) const {
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 91d62a78de390..19dcce8205ebc 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -241,7 +241,7 @@ class RewriteInstance {
/// Adjust function sizes and set proper maximum size values after the whole
/// symbol table has been processed.
- void adjustFunctionBoundaries();
+ void adjustFunctionBoundaries(DenseMap<uint64_t, MarkerSymType> &MarkerSyms);
/// Make .eh_frame section relocatable.
void relocateEHFrameSection();
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index eec68ff5a5fce..8f494f105fbba 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1915,13 +1915,9 @@ void BinaryFunction::postProcessEntryPoints() {
continue;
// If we have grabbed a wrong code label which actually points to some
- // constant island inside the function, ignore this label and remove it
- // from the secondary entry point map.
- if (isStartOfConstantIsland(Offset)) {
- BC.SymbolToFunctionMap.erase(Label);
- removeSymbolFromSecondaryEntryPointMap(Label);
+ // constant island inside the function, ignore this label.
+ if (isStartOfConstantIsland(Offset))
continue;
- }
BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
"detected in function "
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index b2056ba2380fb..79daa38142ae8 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -881,14 +881,9 @@ void RewriteInstance::discoverFileObjects() {
// code section (see IHI0056B). $d identifies data contents.
// Compilers usually merge multiple data objects in a single $d-$x interval,
// but we need every data object to be marked with $d. Because of that we
- // create a vector of MarkerSyms with all locations of data objects.
+ // keep track of marker symbols with all locations of data objects.
- struct MarkerSym {
- uint64_t Address;
- MarkerSymType Type;
- };
-
- std::vector<MarkerSym> SortedMarkerSymbols;
+ DenseMap<uint64_t, MarkerSymType> MarkerSymbols;
auto addExtraDataMarkerPerSymbol = [&]() {
bool IsData = false;
uint64_t LastAddr = 0;
@@ -912,14 +907,14 @@ void RewriteInstance::discoverFileObjects() {
}
if (MarkerType != MarkerSymType::NONE) {
- SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType});
+ MarkerSymbols[SymInfo.Address] = MarkerType;
LastAddr = SymInfo.Address;
IsData = MarkerType == MarkerSymType::DATA;
continue;
}
if (IsData) {
- SortedMarkerSymbols.push_back({SymInfo.Address, MarkerSymType::DATA});
+ MarkerSymbols[SymInfo.Address] = MarkerSymType::DATA;
LastAddr = SymInfo.Address;
}
}
@@ -1284,27 +1279,24 @@ void RewriteInstance::discoverFileObjects() {
BC->setHasSymbolsWithFileName(FileSymbols.size());
// Now that all the functions were created - adjust their boundaries.
- adjustFunctionBoundaries();
+ adjustFunctionBoundaries(MarkerSymbols);
// Annotate functions with code/data markers in AArch64
- for (auto ISym = SortedMarkerSymbols.begin();
- ISym != SortedMarkerSymbols.end(); ++ISym) {
-
- auto *BF =
- BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
+ for (auto &[Address, Type] : MarkerSymbols) {
+ auto *BF = BC->getBinaryFunctionContainingAddress(Address, true, true);
if (!BF) {
// Stray marker
continue;
}
- const auto EntryOffset = ISym->Address - BF->getAddress();
- if (ISym->Type == MarkerSymType::CODE) {
+ const auto EntryOffset = Address - BF->getAddress();
+ if (Type == MarkerSymType::CODE) {
BF->markCodeAtOffset(EntryOffset);
continue;
}
- if (ISym->Type == MarkerSymType::DATA) {
+ if (Type == MarkerSymType::DATA) {
BF->markDataAtOffset(EntryOffset);
- BC->AddressToConstantIslandMap[ISym->Address] = BF;
+ BC->AddressToConstantIslandMap[Address] = BF;
continue;
}
llvm_unreachable("Unknown marker");
@@ -1833,7 +1825,8 @@ void RewriteInstance::disassemblePLT() {
}
}
-void RewriteInstance::adjustFunctionBoundaries() {
+void RewriteInstance::adjustFunctionBoundaries(
+ DenseMap<uint64_t, MarkerSymType> &MarkerSyms) {
for (auto BFI = BC->getBinaryFunctions().begin(),
BFE = BC->getBinaryFunctions().end();
BFI != BFE; ++BFI) {
@@ -1871,12 +1864,15 @@ void RewriteInstance::adjustFunctionBoundaries() {
continue;
}
- // This is potentially another entry point into the function.
- uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
- LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
- << Function << " at offset 0x"
- << Twine::utohexstr(EntryOffset) << '\n');
- Function.addEntryPointAtOffset(EntryOffset);
+ auto It = MarkerSyms.find(NextSymRefI->first);
+ if (It == MarkerSyms.end() || It->second != MarkerSymType::DATA) {
+ // This is potentially another entry point into the function.
+ uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
+ << Function << " at offset 0x"
+ << Twine::utohexstr(EntryOffset) << '\n');
+ Function.addEntryPointAtOffset(EntryOffset);
+ }
++NextSymRefI;
}
diff --git a/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
new file mode 100644
index 0000000000000..9c77ecefa4b96
--- /dev/null
+++ b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
@@ -0,0 +1,38 @@
+# This test is to ensure that discovered data marker symbol would invalidate
+# extra entry point that was added earlier.
+
+# RUN: %clang %cflags %s -o %t.so -Wl,-q -Wl,--init=_bar -Wl,--fini=_bar
+# RUN: llvm-bolt %t.so -o %t.instr.so
+
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ ret
+
+ .text
+ .global _foo
+ .type _foo, %function
+_foo:
+ cbz x1, _foo_2
+_foo_1:
+ add x1, x2, x0
+ b _foo
+_foo_2:
+ ret
+
+# None of these constant island symbols should be identified as extra entry
+# point for function `_foo'.
+ .align 4
+_const1: .short 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80
+_const2: .short 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0
+_const3: .short 0x04, 0x08, 0x0c, 0x20, 0x60, 0x80, 0xa0, 0xc0
+
+ .text
+ .global _bar
+ .type _bar, %function
+_bar:
+ ret
+
+ # Dummy relocation to force relocation mode
+ .reloc 0, R_AARCH64_NONE
More information about the llvm-commits
mailing list