[llvm] [BOLT] Check if symbol is in data area of function (PR #160143)

Asher Dobrescu via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 7 08:52:23 PDT 2025


https://github.com/Asher8118 updated https://github.com/llvm/llvm-project/pull/160143

>From 36823a0c80776a97b6579baeadbee0e762377720 Mon Sep 17 00:00:00 2001
From: Ash Dobrescu <ash.dobrescu at arm.com>
Date: Mon, 22 Sep 2025 16:14:39 +0000
Subject: [PATCH 1/4] [BOLT] Check if symbol is in data area of function

There are cases in which `getEntryIDForSymbol` is called,
where the given Symbol is in a constant island, and so BOLT
can not find its function. This causes BOLT to reach
`llvm_unreachable("symbol not found")` and crash. This patch adds
a check that avoid this crash and gives a warning to the user.
---
 bolt/lib/Core/BinaryContext.cpp       | 10 +++++-
 bolt/test/AArch64/check-symbol-area.s | 49 +++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/AArch64/check-symbol-area.s

diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 72c72bbaf4a65..5f3b3c0e57152 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2374,8 +2374,16 @@ BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
     return nullptr;
 
   BinaryFunction *BF = BFI->second;
-  if (EntryDesc)
+  if (EntryDesc) {
+    const uint64_t Address = BF->getAddress() + Symbol->getOffset();
+    if (BF->isInConstantIsland(Address)) {
+      this->outs() << "BOLT-WARNING: symbol " << Symbol->getName()
+                   << " is in data region of function 0x"
+                   << Twine::utohexstr(Address) << ".\n";
+      return nullptr;
+    }
     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
+  }
 
   return BF;
 }
diff --git a/bolt/test/AArch64/check-symbol-area.s b/bolt/test/AArch64/check-symbol-area.s
new file mode 100644
index 0000000000000..43ce25e16181f
--- /dev/null
+++ b/bolt/test/AArch64/check-symbol-area.s
@@ -0,0 +1,49 @@
+// This test checks that when looking for a function
+// correspnding to a symbol, BOLT is not looking 
+// through a data area (constant island).
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
+
+// Before adding a check for constant islands, BOLT would exit with an error
+// of the form: "symbol not found" and throw an LLVM UNREACHABLE error.
+# CHECK-NOT: symbol not found
+# CHECK-NOT: UNREACHABLE
+
+// Now BOLT throws a warning and does not crash.
+# CHECK: BOLT-WARNING: symbol [[SYM:.*]]  is in data region of function 0x{{.*}}.
+
+.text
+.global main
+main:
+        stp     x14, x15, [sp, -8]!
+        mov     x14, sp
+        adrp    x1, .test
+        add     x0, x1, :lo12:.test
+        bl      first_block
+        ret
+
+.global first_block
+$d:
+first_block:
+        stp     x14, x15, [sp, -8]!
+        mov     x14, sp
+        bl      second_block
+        ret
+second_block:
+        stp     x14, x15, [sp, -8]!
+        mov     x14, sp
+        bl      third_block
+        ret
+$x:
+third_block:
+        stp     x14, x15, [sp, -8]!
+        mov     x14, sp
+        adrp    x1, .data
+        add     x0, x1, :lo12:.test
+        ret
+
+.data
+.test:
+        .string "test"

>From 797b5b2cc3312f9a5519fd2fd5b777363cb9edbe Mon Sep 17 00:00:00 2001
From: Ash Dobrescu <ash.dobrescu at arm.com>
Date: Tue, 23 Sep 2025 10:41:09 +0000
Subject: [PATCH 2/4] Fix warning messgage and test comment

---
 bolt/lib/Core/BinaryContext.cpp       | 2 +-
 bolt/test/AArch64/check-symbol-area.s | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 5f3b3c0e57152..43df8b7967d78 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2379,7 +2379,7 @@ BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
     if (BF->isInConstantIsland(Address)) {
       this->outs() << "BOLT-WARNING: symbol " << Symbol->getName()
                    << " is in data region of function 0x"
-                   << Twine::utohexstr(Address) << ".\n";
+                   << Twine::utohexstr(BF->getAddress()) << ".\n";
       return nullptr;
     }
     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
diff --git a/bolt/test/AArch64/check-symbol-area.s b/bolt/test/AArch64/check-symbol-area.s
index 43ce25e16181f..0125dc2edafca 100644
--- a/bolt/test/AArch64/check-symbol-area.s
+++ b/bolt/test/AArch64/check-symbol-area.s
@@ -1,5 +1,5 @@
 // This test checks that when looking for a function
-// correspnding to a symbol, BOLT is not looking 
+// corresponding to a symbol, BOLT is not looking 
 // through a data area (constant island).
 
 # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o

>From 550efd1be70a7eb9ddc7f611b6f83a8cfabee529 Mon Sep 17 00:00:00 2001
From: Ash Dobrescu <ash.dobrescu at arm.com>
Date: Tue, 7 Oct 2025 15:42:26 +0000
Subject: [PATCH 3/4] Make getEntryIDForSymbol() return optional and address
 review comments

---
 bolt/include/bolt/Core/BinaryFunction.h |  3 ++-
 bolt/lib/Core/BinaryContext.cpp         |  7 +++----
 bolt/lib/Core/BinaryFunction.cpp        |  6 +++---
 bolt/test/AArch64/check-symbol-area.s   | 22 +++++-----------------
 4 files changed, 13 insertions(+), 25 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 51b139a15e1a0..aea418034709f 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -650,7 +650,8 @@ class BinaryFunction {
   ///
   /// Prefer to use BinaryContext::getFunctionForSymbol(EntrySymbol, &ID)
   /// instead of calling this function directly.
-  uint64_t getEntryIDForSymbol(const MCSymbol *EntrySymbol) const;
+  std::optional<uint64_t>
+  getEntryIDForSymbol(const MCSymbol *EntrySymbol) const;
 
   /// If the function represents a secondary split function fragment, set its
   /// parent fragment to \p BF.
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 43df8b7967d78..9630f7d4a8872 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2378,11 +2378,10 @@ BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
     const uint64_t Address = BF->getAddress() + Symbol->getOffset();
     if (BF->isInConstantIsland(Address)) {
       this->outs() << "BOLT-WARNING: symbol " << Symbol->getName()
-                   << " is in data region of function 0x"
-                   << Twine::utohexstr(BF->getAddress()) << ".\n";
-      return nullptr;
+                   << " is in data region of function " << BF->getOneName()
+                   << "(0x" << Twine::utohexstr(BF->getAddress()) << ").\n";
     }
-    *EntryDesc = BF->getEntryIDForSymbol(Symbol);
+    *EntryDesc = BF->getEntryIDForSymbol(Symbol).value_or(0);
   }
 
   return BF;
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 578a87dc6c09d..07d0d23dfc9f0 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -3855,7 +3855,8 @@ MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
   return nullptr;
 }
 
-uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
+std::optional<uint64_t>
+BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
   if (!isMultiEntry())
     return 0;
 
@@ -3882,8 +3883,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
       return NumEntries;
     ++NumEntries;
   }
-
-  llvm_unreachable("symbol not found");
+  return std::nullopt;
 }
 
 bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const {
diff --git a/bolt/test/AArch64/check-symbol-area.s b/bolt/test/AArch64/check-symbol-area.s
index 0125dc2edafca..ceb5bc562a744 100644
--- a/bolt/test/AArch64/check-symbol-area.s
+++ b/bolt/test/AArch64/check-symbol-area.s
@@ -12,38 +12,26 @@
 # CHECK-NOT: UNREACHABLE
 
 // Now BOLT throws a warning and does not crash.
-# CHECK: BOLT-WARNING: symbol [[SYM:.*]]  is in data region of function 0x{{.*}}.
+# CHECK: BOLT-WARNING: symbol third_block/1  is in data region of function first_block(0x{{[0-9a-f]+}}).
 
 .text
 .global main
 main:
-        stp     x14, x15, [sp, -8]!
-        mov     x14, sp
-        adrp    x1, .test
-        add     x0, x1, :lo12:.test
+        add     x0, x1, x1
         bl      first_block
         ret
 
 .global first_block
 $d:
 first_block:
-        stp     x14, x15, [sp, -8]!
-        mov     x14, sp
+        add     x0, x1, x1
         bl      second_block
         ret
 second_block:
-        stp     x14, x15, [sp, -8]!
-        mov     x14, sp
+        add     x0, x1, x1
         bl      third_block
         ret
 $x:
 third_block:
-        stp     x14, x15, [sp, -8]!
-        mov     x14, sp
-        adrp    x1, .data
-        add     x0, x1, :lo12:.test
+        add     x0, x1, x1
         ret
-
-.data
-.test:
-        .string "test"

>From 3e9e9c7b4c402a266991037074dc197febf58dd3 Mon Sep 17 00:00:00 2001
From: Ash Dobrescu <ash.dobrescu at arm.com>
Date: Tue, 7 Oct 2025 15:50:39 +0000
Subject: [PATCH 4/4] Expand test comment to 80 char

---
 bolt/test/AArch64/check-symbol-area.s | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/bolt/test/AArch64/check-symbol-area.s b/bolt/test/AArch64/check-symbol-area.s
index ceb5bc562a744..abab547dd7558 100644
--- a/bolt/test/AArch64/check-symbol-area.s
+++ b/bolt/test/AArch64/check-symbol-area.s
@@ -1,6 +1,5 @@
-// This test checks that when looking for a function
-// corresponding to a symbol, BOLT is not looking 
-// through a data area (constant island).
+// This test checks that when looking for a function corresponding to a
+// symbol, BOLT is not looking through a data area (constant island).
 
 # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
 # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q



More information about the llvm-commits mailing list