[llvm] [BOLT] Retain certain local symbols (PR #184074)

YongKang Zhu via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 2 22:29:06 PST 2026


https://github.com/yozhu updated https://github.com/llvm/llvm-project/pull/184074

>From ebc6e2751392a857ade18d3280c8f7b7b8d4dac9 Mon Sep 17 00:00:00 2001
From: YongKang Zhu <yongzhu at fb.com>
Date: Wed, 18 Feb 2026 22:40:49 -0800
Subject: [PATCH 1/3] [BOLT] Retain certain local symbols

BOLT currently strips all STT_NOTYPE STB_LOCAL zero-sized symbols
that fall inside function bodies. Certain such symbols are named
labels (loop markers and subroutine entry points) or local function
symbols in hand-written assembly. We now keep them in local symbol
table in BOLT processed binaries for better symbolication.
---
 bolt/lib/Rewrite/RewriteInstance.cpp          | 43 +++++++++++++------
 .../AArch64/compare-and-branch-inversion.S    |  8 ++++
 .../compare-and-branch-reorder-blocks.S       |  2 +
 bolt/test/AArch64/retain-local-symbols.s      | 30 +++++++++++++
 bolt/test/X86/avx512-trap.test                |  3 +-
 bolt/test/X86/dynamic-relocs-on-entry.s       |  5 ++-
 6 files changed, 74 insertions(+), 17 deletions(-)
 create mode 100644 bolt/test/AArch64/retain-local-symbols.s

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index e9bb5d81c80f9..f8bd956c3a8fe 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -5352,13 +5352,27 @@ void RewriteInstance::updateELFSymbolTable(
     } else {
       // Check if the function symbol matches address inside a function, i.e.
       // it marks a secondary entry point.
+      // Also look up local NOTYPE symbols inside functions so we can
+      // update their addresses to reflect the output layout.
+      // Skip AArch64 marker symbols ($d, $x) inside functions —
+      // BOLT generates its own via addExtraSymbols.
+      auto IsAArch64MarkerSymbol = [&]() {
+        return BC->isAArch64() &&
+               (SymbolName->starts_with("$d") || SymbolName->starts_with("$x"));
+      };
+      const bool IsLocalLabel = Symbol.getType() == ELF::STT_NOTYPE &&
+                                Symbol.getBinding() == ELF::STB_LOCAL &&
+                                Symbol.st_size == 0 && !IsAArch64MarkerSymbol();
       Function =
-          (Symbol.getType() == ELF::STT_FUNC)
+          (Symbol.getType() == ELF::STT_FUNC || IsLocalLabel)
               ? BC->getBinaryFunctionContainingAddress(Symbol.st_value,
                                                        /*CheckPastEnd=*/false,
                                                        /*UseMaxSize=*/true)
               : nullptr;
 
+      assert((!Function || !IsLocalLabel || !Function->isFolded()) &&
+             "Local label inside ICF-folded function");
+
       if (Function && Function->isEmitted()) {
         assert(Function->getLayout().isHotColdSplit() &&
                "Adding symbols based on cold fragment when there are more than "
@@ -5366,6 +5380,12 @@ void RewriteInstance::updateELFSymbolTable(
         const uint64_t OutputAddress =
             Function->translateInputToOutputAddress(Symbol.st_value);
 
+        // Remove symbols that cannot be mapped to the output, e.g.
+        // data-in-code labels (jump tables) whose addresses BOLT
+        // does not track.
+        if (!OutputAddress)
+          continue;
+
         NewSymbol.st_value = OutputAddress;
         // Force secondary entry points to have zero size.
         NewSymbol.st_size = 0;
@@ -5414,19 +5434,14 @@ void RewriteInstance::updateELFSymbolTable(
             NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
         }
 
-        // Detect local syms in the text section that we didn't update
-        // and that were preserved by the linker to support relocations against
-        // .text. Remove them from the symtab.
-        if (Symbol.getType() == ELF::STT_NOTYPE &&
-            Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) {
-          if (BC->getBinaryFunctionContainingAddress(Symbol.st_value,
-                                                     /*CheckPastEnd=*/false,
-                                                     /*UseMaxSize=*/true)) {
-            // Can only delete the symbol if not patching. Such symbols should
-            // not exist in the dynamic symbol table.
-            assert(!IsDynSym && "cannot delete symbol");
-            continue;
-          }
+        // Drop AArch64 marker symbols ($d, $x) inside functions —
+        // BOLT generates its own via addExtraSymbols.
+        if (IsAArch64MarkerSymbol() &&
+            BC->getBinaryFunctionContainingAddress(Symbol.st_value,
+                                                   /*CheckPastEnd=*/false,
+                                                   /*UseMaxSize=*/true)) {
+          assert(!IsDynSym && "cannot delete symbol");
+          continue;
         }
       }
     }
diff --git a/bolt/test/AArch64/compare-and-branch-inversion.S b/bolt/test/AArch64/compare-and-branch-inversion.S
index f5903b2b5d25d..479775a2bbd64 100644
--- a/bolt/test/AArch64/compare-and-branch-inversion.S
+++ b/bolt/test/AArch64/compare-and-branch-inversion.S
@@ -30,8 +30,10 @@ immediate_increment:
 
 # CHECK: <immediate_increment>:
 # CHECK-NEXT:            {{.*}} cblt x0, #0x1, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}>
+# CHECK:      <.exit0>:
 # CHECK-NEXT:            {{.*}} mov  x0, #0x2 // =2
 # CHECK-NEXT:            {{.*}} ret
+# CHECK:      <.cold0>:
 # CHECK-NEXT: [[ADDR0]]: {{.*}} mov x0, #0x1 // =1
 # CHECK-NEXT:            {{.*}} ret
 
@@ -52,8 +54,10 @@ immediate_decrement:
 
 # CHECK: <immediate_decrement>:
 # CHECK-NEXT:            {{.*}} cbhi x0, #0x0, 0x[[ADDR1:[0-9a-f]+]] <{{.*}}>
+# CHECK:      <.exit1>:
 # CHECK-NEXT:            {{.*}} mov  x0, #0x2 // =2
 # CHECK-NEXT:            {{.*}} ret
+# CHECK:      <.cold1>:
 # CHECK-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1
 # CHECK-NEXT:            {{.*}} ret
 
@@ -74,8 +78,10 @@ register_swap:
 
 # CHECK: <register_swap>:
 # CHECK-NEXT:            {{.*}} cbgt x1, x0, 0x[[ADDR2:[0-9a-f]+]] <{{.*}}>
+# CHECK:      <.exit2>:
 # CHECK-NEXT:            {{.*}} mov  x0, #0x2 // =2
 # CHECK-NEXT:            {{.*}} ret
+# CHECK:      <.cold2>:
 # CHECK-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
 # CHECK-NEXT:            {{.*}} ret
 
@@ -99,8 +105,10 @@ irreversible:
 # CHECK: <irreversible>:
 # CHECK-NEXT:            {{.*}} cbgt x0, #0x3f, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
 # CHECK-NEXT:            {{.*}} b               0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
+# CHECK:      <.exit3>:
 # CHECK-NEXT: [[ADDR3]]: {{.*}} mov  x0, #0x2 // =2
 # CHECK-NEXT:            {{.*}} ret
+# CHECK:      <.cold3>:
 # CHECK-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x1 // =1
 # CHECK-NEXT:            {{.*}} ret
 
diff --git a/bolt/test/AArch64/compare-and-branch-reorder-blocks.S b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
index 464243d788c1f..0a5e75e9a4375 100644
--- a/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
+++ b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S
@@ -41,8 +41,10 @@ reorder_blocks:
 
 # CHECK: <reorder_blocks>:
 # CHECK-NEXT:           {{.*}} cbgt x0, #0x0, 0x[[ADDR:[0-9a-f]+]] <{{.*}}>
+# CHECK:      <.hot_exit>:
 # CHECK-NEXT:           {{.*}} mov  x0, #0x2 // =2
 # CHECK-NEXT:           {{.*}} ret
+# CHECK:      <.cold_exit>:
 # CHECK-NEXT: [[ADDR]]: {{.*}} mov  x0, #0x1 // =1
 # CHECK-NEXT:           {{.*}} ret
 
diff --git a/bolt/test/AArch64/retain-local-symbols.s b/bolt/test/AArch64/retain-local-symbols.s
new file mode 100644
index 0000000000000..b7c8e48d9e203
--- /dev/null
+++ b/bolt/test/AArch64/retain-local-symbols.s
@@ -0,0 +1,30 @@
+## Check that BOLT retains named local symbols (assembly labels) inside
+## functions and updates their addresses to reflect the output layout.
+
+# RUN: %clang %cflags %s -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt -lite=false
+# RUN: llvm-nm -n %t.bolt | FileCheck %s
+
+# CHECK: T _start
+# CHECK: t loop_start
+# CHECK: t loop_end
+# CHECK: t helper
+
+  .text
+  .global _start
+  .type _start, %function
+_start:
+  mov x0, #10
+  bl helper
+loop_start:
+  sub x0, x0, #1
+  cbnz x0, loop_start
+loop_end:
+  ret
+
+helper:
+  add x0, x0, #1
+  ret
+
+## Force relocation mode.
+  .reloc 0, R_AARCH64_NONE
diff --git a/bolt/test/X86/avx512-trap.test b/bolt/test/X86/avx512-trap.test
index 93b02f4397cc8..8c98fa3016d57 100644
--- a/bolt/test/X86/avx512-trap.test
+++ b/bolt/test/X86/avx512-trap.test
@@ -8,7 +8,7 @@ RUN: llvm-objdump -d --disassemble-symbols=use_avx512 %t | \
 RUN:   FileCheck %s --check-prefix=CHECK-DIS-NO-TRAP
 
 RUN: llvm-bolt %t --trap-avx512=1 -o %t.bolt --lite=0 2>&1 | FileCheck %s
-RUN: llvm-objdump -d --disassemble-symbols=use_avx512 %t.bolt | \
+RUN: llvm-objdump -d --disassemble-symbols=use_avx512,secondary_entry %t.bolt | \
 RUN:   FileCheck %s --check-prefix=CHECK-DIS
 
 RUN: llvm-bolt %t --trap-avx512=0 -o %t.bolt --lite=0
@@ -20,6 +20,7 @@ CHECK: BOLT-WARNING: 1 function will trap on entry
 ## Check that we have two ud2 instructions - one per entry.
 CHECK-DIS:      use_avx512
 CHECK-DIS-NEXT:    ud2
+CHECK-DIS:      <secondary_entry>:
 CHECK-DIS-NEXT:    ud2
 
 ## Check that we generate correct AVX-512
diff --git a/bolt/test/X86/dynamic-relocs-on-entry.s b/bolt/test/X86/dynamic-relocs-on-entry.s
index 4ec8ba4ad4460..477aece70f19e 100644
--- a/bolt/test/X86/dynamic-relocs-on-entry.s
+++ b/bolt/test/X86/dynamic-relocs-on-entry.s
@@ -5,13 +5,14 @@
 # RUN: %clang %cflags -fPIC -pie %s -o %t.exe -nostdlib -Wl,-q
 # RUN: llvm-bolt %t.exe -o %t.bolt > %t.out.txt
 # RUN: llvm-readelf -r %t.bolt >> %t.out.txt
-# RUN: llvm-objdump --disassemble-symbols=chain %t.bolt >> %t.out.txt
+# RUN: llvm-objdump --disassemble-symbols=chain,Label %t.bolt >> %t.out.txt
 # RUN: FileCheck %s --input-file=%t.out.txt
 
 ## Check if the new address in `chain` is correctly updated by BOLT
 # CHECK: Relocation section '.rela.dyn' at offset 0x{{.*}} contains 1 entries:
 # CHECK: {{.*}} R_X86_64_RELATIVE [[#%x,ADDR:]]
-# CHECK: [[#ADDR]]: c3 retq
+# CHECK: <Label>:
+# CHECK-NEXT: [[#ADDR]]: c3 retq
 	.text
 	.type   chain, @function
 chain:

>From 6443486e631fbbff8c4cf2efd1e98bae6af3fb8f Mon Sep 17 00:00:00 2001
From: YongKang Zhu <yongzhu at fb.com>
Date: Mon, 2 Mar 2026 10:34:51 -0800
Subject: [PATCH 2/3] Fix RISCV test failures

---
 bolt/test/RISCV/mapping-syms-isa.test | 3 +--
 bolt/test/RISCV/mapping-syms.s        | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/bolt/test/RISCV/mapping-syms-isa.test b/bolt/test/RISCV/mapping-syms-isa.test
index 22678af12f913..f9693058f00d1 100644
--- a/bolt/test/RISCV/mapping-syms-isa.test
+++ b/bolt/test/RISCV/mapping-syms-isa.test
@@ -13,6 +13,5 @@ CHECK: 00000002: ret
 # Check .word is still present in output
 CHECK-OBJDUMP: <_start>:
 CHECK-OBJDUMP-NEXT: nop
-CHECK-OBJDUMP-NEXT: unimp
-CHECK-OBJDUMP-NEXT: unimp
+CHECK-OBJDUMP-NEXT: .word 0x00000000
 CHECK-OBJDUMP-NEXT: ret
diff --git a/bolt/test/RISCV/mapping-syms.s b/bolt/test/RISCV/mapping-syms.s
index e8fdeb0c7572d..a29edaa80056e 100644
--- a/bolt/test/RISCV/mapping-syms.s
+++ b/bolt/test/RISCV/mapping-syms.s
@@ -15,8 +15,7 @@
 /// Check .word is still present in output
 // CHECK-OBJDUMP: <_start>:
 // CHECK-OBJDUMP-NEXT: nop
-// CHECK-OBJDUMP-NEXT: unimp
-// CHECK-OBJDUMP-NEXT: unimp
+// CHECK-OBJDUMP-NEXT: .word 0x00000000
 // CHECK-OBJDUMP-NEXT: ret
     .text
     .globl _start

>From 7ba89fa6e9e85ae6b319e9a6cb2a8d0d657263ad Mon Sep 17 00:00:00 2001
From: YongKang Zhu <yongzhu at fb.com>
Date: Mon, 2 Mar 2026 21:57:41 -0800
Subject: [PATCH 3/3] Use the existing marker check logic in BinaryContext, so
 to consider RISC-V marker symbols as well. BOLT currently targets X86,
 AArch64 and RISC-V only, and X86 doesn't have marker symbol.

---
 bolt/include/bolt/Core/BinaryContext.h |  4 +++-
 bolt/lib/Core/BinaryContext.cpp        | 29 +++++++++++++++-----------
 bolt/lib/Rewrite/RewriteInstance.cpp   | 14 ++++++-------
 bolt/test/RISCV/mapping-syms-isa.test  |  3 ++-
 bolt/test/RISCV/mapping-syms.s         |  3 ++-
 5 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 8741b3c077bdd..f8a0be0418433 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -887,9 +887,11 @@ class BinaryContext {
 
   bool isRISCV() const { return TheTriple->getArch() == llvm::Triple::riscv64; }
 
-  // AArch64-specific functions to check if symbol is used to delimit
+  // AArch64/RISC-V functions to check if symbol is used to delimit
   // code/data in .text. Code is marked by $x, data by $d.
   MarkerSymType getMarkerType(const SymbolRef &Symbol) const;
+  MarkerSymType getMarkerType(unsigned SymbolType, uint64_t SymbolSize,
+                              StringRef SymbolName) const;
   bool isMarker(const SymbolRef &Symbol) const;
 
   /// Iterate over all BinaryData.
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index f0070b47ee52f..1f1e360c3c9f4 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2039,35 +2039,40 @@ void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
   }
 }
 
-MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
+MarkerSymType BinaryContext::getMarkerType(unsigned SymbolType,
+                                           uint64_t SymbolSize,
+                                           StringRef SymbolName) const {
   // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
   // in the code section (see IHI0056B). $x identifies a symbol starting code or
   // the end of a data chunk inside code, $d identifies start of data.
-  if (isX86() || ELFSymbolRef(Symbol).getSize())
-    return MarkerSymType::NONE;
-
-  Expected<StringRef> NameOrError = Symbol.getName();
-  Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
-
-  if (!TypeOrError || !NameOrError)
+  if (isX86() || SymbolSize)
     return MarkerSymType::NONE;
 
-  if (*TypeOrError != SymbolRef::ST_Unknown)
+  if (SymbolType != ELF::STT_NOTYPE)
     return MarkerSymType::NONE;
 
-  if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
+  if (SymbolName == "$x" || SymbolName.starts_with("$x."))
     return MarkerSymType::CODE;
 
   // $x<ISA>
-  if (isRISCV() && NameOrError->starts_with("$x"))
+  if (isRISCV() && SymbolName.starts_with("$x"))
     return MarkerSymType::CODE;
 
-  if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
+  if (SymbolName == "$d" || SymbolName.starts_with("$d."))
     return MarkerSymType::DATA;
 
   return MarkerSymType::NONE;
 }
 
+MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
+  Expected<StringRef> NameOrError = Symbol.getName();
+  if (!NameOrError)
+    return MarkerSymType::NONE;
+
+  return getMarkerType(ELFSymbolRef(Symbol).getELFType(),
+                       ELFSymbolRef(Symbol).getSize(), *NameOrError);
+}
+
 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
   return getMarkerType(Symbol) != MarkerSymType::NONE;
 }
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index f8bd956c3a8fe..16c9012b1fee6 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -5354,15 +5354,15 @@ void RewriteInstance::updateELFSymbolTable(
       // it marks a secondary entry point.
       // Also look up local NOTYPE symbols inside functions so we can
       // update their addresses to reflect the output layout.
-      // Skip AArch64 marker symbols ($d, $x) inside functions —
+      // Skip AArch64/RISC-V marker symbols ($d, $x) inside functions —
       // BOLT generates its own via addExtraSymbols.
-      auto IsAArch64MarkerSymbol = [&]() {
-        return BC->isAArch64() &&
-               (SymbolName->starts_with("$d") || SymbolName->starts_with("$x"));
+      auto IsMarkerSymbol = [&]() {
+        return BC->getMarkerType(Symbol.getType(), Symbol.st_size,
+                                 *SymbolName) != MarkerSymType::NONE;
       };
       const bool IsLocalLabel = Symbol.getType() == ELF::STT_NOTYPE &&
                                 Symbol.getBinding() == ELF::STB_LOCAL &&
-                                Symbol.st_size == 0 && !IsAArch64MarkerSymbol();
+                                Symbol.st_size == 0 && !IsMarkerSymbol();
       Function =
           (Symbol.getType() == ELF::STT_FUNC || IsLocalLabel)
               ? BC->getBinaryFunctionContainingAddress(Symbol.st_value,
@@ -5434,9 +5434,9 @@ void RewriteInstance::updateELFSymbolTable(
             NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
         }
 
-        // Drop AArch64 marker symbols ($d, $x) inside functions —
+        // Drop AArch64/RISC-V marker symbols ($d, $x) inside functions —
         // BOLT generates its own via addExtraSymbols.
-        if (IsAArch64MarkerSymbol() &&
+        if (IsMarkerSymbol() &&
             BC->getBinaryFunctionContainingAddress(Symbol.st_value,
                                                    /*CheckPastEnd=*/false,
                                                    /*UseMaxSize=*/true)) {
diff --git a/bolt/test/RISCV/mapping-syms-isa.test b/bolt/test/RISCV/mapping-syms-isa.test
index f9693058f00d1..22678af12f913 100644
--- a/bolt/test/RISCV/mapping-syms-isa.test
+++ b/bolt/test/RISCV/mapping-syms-isa.test
@@ -13,5 +13,6 @@ CHECK: 00000002: ret
 # Check .word is still present in output
 CHECK-OBJDUMP: <_start>:
 CHECK-OBJDUMP-NEXT: nop
-CHECK-OBJDUMP-NEXT: .word 0x00000000
+CHECK-OBJDUMP-NEXT: unimp
+CHECK-OBJDUMP-NEXT: unimp
 CHECK-OBJDUMP-NEXT: ret
diff --git a/bolt/test/RISCV/mapping-syms.s b/bolt/test/RISCV/mapping-syms.s
index a29edaa80056e..e8fdeb0c7572d 100644
--- a/bolt/test/RISCV/mapping-syms.s
+++ b/bolt/test/RISCV/mapping-syms.s
@@ -15,7 +15,8 @@
 /// Check .word is still present in output
 // CHECK-OBJDUMP: <_start>:
 // CHECK-OBJDUMP-NEXT: nop
-// CHECK-OBJDUMP-NEXT: .word 0x00000000
+// CHECK-OBJDUMP-NEXT: unimp
+// CHECK-OBJDUMP-NEXT: unimp
 // CHECK-OBJDUMP-NEXT: ret
     .text
     .globl _start



More information about the llvm-commits mailing list