[llvm-branch-commits] [llvm] [BOLT] Use identify_magic for shared library detection (PR #190902)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Apr 7 20:52:58 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
Replace the fragile filename-based check (ends_with(".so")) with
identify_magic()/file_magic::elf_shared_object to reliably detect
shared libraries when filtering pre-aggregated profile data by
build ID.
Test Plan: pre-aggregated-perf-shlib.test
---
Full diff: https://github.com/llvm/llvm-project/pull/190902.diff
2 Files Affected:
- (modified) bolt/lib/Profile/DataAggregator.cpp (+5-1)
- (added) bolt/test/X86/pre-aggregated-perf-shlib.test (+70)
``````````diff
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 38a975e449a16..f39cdda086a50 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -22,6 +22,7 @@
#include "bolt/Utils/Utils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -394,7 +395,10 @@ void DataAggregator::parsePreAggregated() {
Line = 1;
// When processing a shared object, filter pre-aggregated entries by buildid.
- if (BC && !BC->HasFixedLoadAddress && BC->getFilename().ends_with(".so")) {
+ file_magic Magic;
+ if (BC && !BC->HasFixedLoadAddress &&
+ !identify_magic(BC->getFilename(), Magic) &&
+ Magic == file_magic::elf_shared_object) {
if (auto FileBID = BC->getFileBuildID()) {
FilterBuildID = *FileBID;
outs() << "PERF2BOLT: filtering pre-aggregated data for buildid "
diff --git a/bolt/test/X86/pre-aggregated-perf-shlib.test b/bolt/test/X86/pre-aggregated-perf-shlib.test
new file mode 100644
index 0000000000000..bd5c0cf8c2851
--- /dev/null
+++ b/bolt/test/X86/pre-aggregated-perf-shlib.test
@@ -0,0 +1,70 @@
+## Test that perf2bolt correctly detects shared objects using identify_magic
+## and activates build ID filtering for pre-aggregated profiles.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -shared -Wl,-q -Wl,--build-id=0xaabb1122 -nostdlib
+# RUN: link_fdata %s %t.so %t.pa PREAGG
+# RUN: link_fdata %s %t.so %t.pa-bid PREAGG-BID
+
+## Test 1: Without buildid prefix, addresses don't match the .so's buildid
+## and are treated as external → 0 functions with profile.
+# RUN: perf2bolt %t.so -o %t.fdata --pa -p %t.pa \
+# RUN: --profile-format=fdata 2>&1 | FileCheck %s --check-prefix=CHECK-NEG
+
+# CHECK-NEG: PERF2BOLT: filtering pre-aggregated data for buildid aabb1122
+# CHECK-NEG: BOLT-INFO: 0 out of {{[0-9]+}} functions in the binary (0.0%) have non-empty execution profile
+
+## Test 2: With matching buildid prefix, addresses are recognized.
+# RUN: perf2bolt %t.so -o %t.fdata2 --pa -p %t.pa-bid \
+# RUN: --profile-format=fdata 2>&1 | FileCheck %s --check-prefix=CHECK-POS
+
+# CHECK-POS: PERF2BOLT: filtering pre-aggregated data for buildid aabb1122
+# CHECK-POS: BOLT-INFO: 1 out of {{[0-9]+}} functions in the binary
+# CHECK-POS-NOT: (0.0%)
+
+ .text
+ .globl foo
+ .type foo, @function
+foo:
+ pushq %rbp
+ movq %rsp, %rbp
+ cmpl $0, %edi
+ je Lfalse
+Ltrue:
+ movl $1, %eax
+ jmp Lret
+Lfalse:
+ xorl %eax, %eax
+Lret:
+ popq %rbp
+ retq
+ .size foo, .-foo
+
+# PREAGG: B #foo# #Lret# 10 0
+# PREAGG: F #Lfalse# #Lret# 5
+
+# PREAGG-BID: B aabb1122:#foo# aabb1122:#Lret# 10 0
+# PREAGG-BID: F aabb1122:#Lfalse# aabb1122:#Lret# 5
+
+## Test 3: Executable with ".so" in the filename should NOT trigger filtering.
+## The old heuristic (ends_with(".so")) would false-positive here, but
+## identify_magic correctly identifies it as an executable.
+# RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe.so
+# RUN: perf2bolt %t.exe.so -o %t.fdata3 --pa -p %p/Inputs/pre-aggregated.txt \
+# RUN: --profile-format=fdata 2>&1 | FileCheck %s --check-prefix=CHECK-EXE
+
+# CHECK-EXE-NOT: filtering pre-aggregated data for buildid
+# CHECK-EXE: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+
+## Test 4: Shared library without ".so" in the filename should still trigger
+## filtering. The old heuristic would false-negative here, but identify_magic
+## correctly identifies it as a shared object.
+# RUN: cp %t.so %t.dynlib
+# RUN: perf2bolt %t.dynlib -o %t.fdata4 --pa -p %t.pa-bid \
+# RUN: --profile-format=fdata 2>&1 | FileCheck %s --check-prefix=CHECK-NOEXT
+
+# CHECK-NOEXT: PERF2BOLT: filtering pre-aggregated data for buildid aabb1122
+# CHECK-NOEXT: BOLT-INFO: 1 out of {{[0-9]+}} functions in the binary
+# CHECK-NOEXT-NOT: (0.0%)
``````````
</details>
https://github.com/llvm/llvm-project/pull/190902
More information about the llvm-branch-commits
mailing list