[llvm] [BOLT][WIP] Always treat function entry as code (PR #160161)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 4 13:52:12 PDT 2025


https://github.com/maksfb updated https://github.com/llvm/llvm-project/pull/160161

>From e0f772525b3ee988896623fba90c005393e53af2 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Mon, 22 Sep 2025 11:01:37 -0700
Subject: [PATCH] [BOLT] Always treat function entry as code

If an address has both, a data marker "$d" and a function symbol
associated with it, treat it as code.
---
 bolt/lib/Rewrite/RewriteInstance.cpp     | 12 +++++++-----
 bolt/test/AArch64/data-at-0-offset.c     | 17 -----------------
 bolt/test/AArch64/function-data-marker.s | 23 +++++++++++++++++++++++
 3 files changed, 30 insertions(+), 22 deletions(-)
 delete mode 100644 bolt/test/AArch64/data-at-0-offset.c
 create mode 100644 bolt/test/AArch64/function-data-marker.s

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index c13a9f016e8ae..02968eefa447b 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -917,9 +917,6 @@ void RewriteInstance::discoverFileObjects() {
     bool IsData = false;
     uint64_t LastAddr = 0;
     for (const auto &SymInfo : SortedSymbols) {
-      if (LastAddr == SymInfo.Address) // don't repeat markers
-        continue;
-
       MarkerSymType MarkerType = BC->getMarkerType(SymInfo.Symbol);
 
       // Treat ST_Function as code.
@@ -929,8 +926,13 @@ void RewriteInstance::discoverFileObjects() {
         if (IsData) {
           Expected<StringRef> NameOrError = SymInfo.Symbol.getName();
           consumeError(NameOrError.takeError());
-          BC->errs() << "BOLT-WARNING: function symbol " << *NameOrError
-                     << " lacks code marker\n";
+          if (LastAddr == SymInfo.Address) {
+            BC->errs() << "BOLT-WARNING: ignoring data marker conflicting with "
+                          "function symbol " << *NameOrError << '\n';
+          } else {
+            BC->errs() << "BOLT-WARNING: function symbol " << *NameOrError
+                       << " lacks code marker\n";
+          }
         }
         MarkerType = MarkerSymType::CODE;
       }
diff --git a/bolt/test/AArch64/data-at-0-offset.c b/bolt/test/AArch64/data-at-0-offset.c
deleted file mode 100644
index 01248a637d393..0000000000000
--- a/bolt/test/AArch64/data-at-0-offset.c
+++ /dev/null
@@ -1,17 +0,0 @@
-// RUN: %clang %cflags -O2 -fPIE -std=gnu99 -Wl,-q -pie  %s -o %t.exe
-// RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
-// CHECK-NOT: BOLT-WARNING: unable to disassemble instruction at offset
-
-void extra_space() {
-  asm volatile(".rept 256\n"
-               "    .byte 0xff\n"
-               ".endr\n");
-  return;
-}
-
-int main(int argc, char **argv) {
-  void (*fn)(void);
-  fn = extra_space + 256;
-  fn();
-  return 0;
-}
diff --git a/bolt/test/AArch64/function-data-marker.s b/bolt/test/AArch64/function-data-marker.s
new file mode 100644
index 0000000000000..71b79acf0fc7f
--- /dev/null
+++ b/bolt/test/AArch64/function-data-marker.s
@@ -0,0 +1,23 @@
+## Check that if a data marker is present at the start of a function, the
+## underlying bytes are still treated as code.
+
+# RUN: %clang %cflags %s -o %t.exe
+# RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg 2>&1 | FileCheck %s
+
+# CHECK: BOLT-WARNING: ignoring data marker conflicting with function symbol _start
+
+.text
+.balign 4
+
+## Data marker is emitted because ".long" directive is used instead of ".inst".
+.global _start
+.type _start, %function
+_start:
+  .long 0xcec08000 // sha512su0 v0.2d, v0.2d
+  ret
+.size _start, .-_start
+
+# CHECK-LABEL: Binary Function "_start"
+# CHECK: Entry Point
+# CHECK-NEXT: sha512su0 v0.2d, v0.2d
+



More information about the llvm-commits mailing list