[llvm-branch-commits] [llvm] [BOLT] Use identify_magic for shared library detection (PR #190902)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Apr 7 20:52:58 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

Replace the fragile filename-based check (ends_with(".so")) with
identify_magic()/file_magic::elf_shared_object to reliably detect
shared libraries when filtering pre-aggregated profile data by
build ID.

Test Plan: pre-aggregated-perf-shlib.test


---
Full diff: https://github.com/llvm/llvm-project/pull/190902.diff


2 Files Affected:

- (modified) bolt/lib/Profile/DataAggregator.cpp (+5-1) 
- (added) bolt/test/X86/pre-aggregated-perf-shlib.test (+70) 


``````````diff
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 38a975e449a16..f39cdda086a50 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -22,6 +22,7 @@
 #include "bolt/Utils/Utils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
@@ -394,7 +395,10 @@ void DataAggregator::parsePreAggregated() {
   Line = 1;
 
   // When processing a shared object, filter pre-aggregated entries by buildid.
-  if (BC && !BC->HasFixedLoadAddress && BC->getFilename().ends_with(".so")) {
+  file_magic Magic;
+  if (BC && !BC->HasFixedLoadAddress &&
+      !identify_magic(BC->getFilename(), Magic) &&
+      Magic == file_magic::elf_shared_object) {
     if (auto FileBID = BC->getFileBuildID()) {
       FilterBuildID = *FileBID;
       outs() << "PERF2BOLT: filtering pre-aggregated data for buildid "
diff --git a/bolt/test/X86/pre-aggregated-perf-shlib.test b/bolt/test/X86/pre-aggregated-perf-shlib.test
new file mode 100644
index 0000000000000..bd5c0cf8c2851
--- /dev/null
+++ b/bolt/test/X86/pre-aggregated-perf-shlib.test
@@ -0,0 +1,70 @@
+## Test that perf2bolt correctly detects shared objects using identify_magic
+## and activates build ID filtering for pre-aggregated profiles.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -shared -Wl,-q -Wl,--build-id=0xaabb1122 -nostdlib
+# RUN: link_fdata %s %t.so %t.pa PREAGG
+# RUN: link_fdata %s %t.so %t.pa-bid PREAGG-BID
+
+## Test 1: Without buildid prefix, addresses don't match the .so's buildid
+## and are treated as external → 0 functions with profile.
+# RUN: perf2bolt %t.so -o %t.fdata --pa -p %t.pa \
+# RUN:   --profile-format=fdata 2>&1 | FileCheck %s --check-prefix=CHECK-NEG
+
+# CHECK-NEG: PERF2BOLT: filtering pre-aggregated data for buildid aabb1122
+# CHECK-NEG: BOLT-INFO: 0 out of {{[0-9]+}} functions in the binary (0.0%) have non-empty execution profile
+
+## Test 2: With matching buildid prefix, addresses are recognized.
+# RUN: perf2bolt %t.so -o %t.fdata2 --pa -p %t.pa-bid \
+# RUN:   --profile-format=fdata 2>&1 | FileCheck %s --check-prefix=CHECK-POS
+
+# CHECK-POS: PERF2BOLT: filtering pre-aggregated data for buildid aabb1122
+# CHECK-POS: BOLT-INFO: 1 out of {{[0-9]+}} functions in the binary
+# CHECK-POS-NOT: (0.0%)
+
+  .text
+  .globl foo
+  .type foo, @function
+foo:
+  pushq %rbp
+  movq  %rsp, %rbp
+  cmpl  $0, %edi
+  je    Lfalse
+Ltrue:
+  movl  $1, %eax
+  jmp   Lret
+Lfalse:
+  xorl  %eax, %eax
+Lret:
+  popq  %rbp
+  retq
+  .size foo, .-foo
+
+# PREAGG: B #foo# #Lret# 10 0
+# PREAGG: F #Lfalse# #Lret# 5
+
+# PREAGG-BID: B aabb1122:#foo# aabb1122:#Lret# 10 0
+# PREAGG-BID: F aabb1122:#Lfalse# aabb1122:#Lret# 5
+
+## Test 3: Executable with ".so" in the filename should NOT trigger filtering.
+## The old heuristic (ends_with(".so")) would false-positive here, but
+## identify_magic correctly identifies it as an executable.
+# RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe.so
+# RUN: perf2bolt %t.exe.so -o %t.fdata3 --pa -p %p/Inputs/pre-aggregated.txt \
+# RUN:   --profile-format=fdata 2>&1 | FileCheck %s --check-prefix=CHECK-EXE
+
+# CHECK-EXE-NOT: filtering pre-aggregated data for buildid
+# CHECK-EXE: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+
+## Test 4: Shared library without ".so" in the filename should still trigger
+## filtering. The old heuristic would false-negative here, but identify_magic
+## correctly identifies it as a shared object.
+# RUN: cp %t.so %t.dynlib
+# RUN: perf2bolt %t.dynlib -o %t.fdata4 --pa -p %t.pa-bid \
+# RUN:   --profile-format=fdata 2>&1 | FileCheck %s --check-prefix=CHECK-NOEXT
+
+# CHECK-NOEXT: PERF2BOLT: filtering pre-aggregated data for buildid aabb1122
+# CHECK-NOEXT: BOLT-INFO: 1 out of {{[0-9]+}} functions in the binary
+# CHECK-NOEXT-NOT: (0.0%)

``````````

</details>


https://github.com/llvm/llvm-project/pull/190902


More information about the llvm-branch-commits mailing list