[llvm] [BOLT] Add pre-parsed perf script support (PR #163785)
Ádám Kallai via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 29 06:35:49 PST 2026
https://github.com/kaadam updated https://github.com/llvm/llvm-project/pull/163785
>From 3fd753d61ac27bcf99c744380ff9d0484655ca66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gergely=20B=C3=A1lint?= <gergely.balint at arm.com>
Date: Tue, 30 Jul 2024 14:38:20 +0200
Subject: [PATCH 1/8] Enable cross compilation of the runtime libs
This patch builds libbolt_rt_inst.a and libbolt_rt_hugify.a
for each target architecture. This enables using the --instrument
and --hugify options on binaries with different architecture from
the host.
The patch also changes the default locations searched for the libs.
[BOLT] remove cross-compilers from BOLT runtime build process
---
bolt/CMakeLists.txt | 131 ++++++++++++++----
bolt/cmake/test/test.cpp | 3 +
.../include/bolt/RuntimeLibs/RuntimeLibrary.h | 9 +-
bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp | 9 +-
.../InstrumentationRuntimeLibrary.cpp | 10 +-
bolt/lib/RuntimeLibs/RuntimeLibrary.cpp | 41 +++---
bolt/runtime/CMakeLists.txt | 14 +-
bolt/tools/driver/CMakeLists.txt | 4 +-
8 files changed, 160 insertions(+), 61 deletions(-)
create mode 100644 bolt/cmake/test/test.cpp
diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt
index 5c7d51e1e398c..1433cd8b044ee 100644
--- a/bolt/CMakeLists.txt
+++ b/bolt/CMakeLists.txt
@@ -89,8 +89,7 @@ if ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64"
OR CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64|aarch64)$"
OR CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64")
AND (CMAKE_SYSTEM_NAME STREQUAL "Linux"
- OR CMAKE_SYSTEM_NAME STREQUAL "Darwin")
- AND (NOT CMAKE_CROSSCOMPILING))
+ OR CMAKE_SYSTEM_NAME STREQUAL "Darwin"))
set(BOLT_ENABLE_RUNTIME_default ON)
endif()
option(BOLT_ENABLE_RUNTIME "Enable BOLT runtime" ${BOLT_ENABLE_RUNTIME_default})
@@ -140,36 +139,110 @@ if (LLVM_INCLUDE_TESTS)
endif()
endif()
-if (BOLT_ENABLE_RUNTIME)
- message(STATUS "Building BOLT runtime libraries for ${CMAKE_SYSTEM_PROCESSOR}")
- set(extra_args "")
- if(CMAKE_SYSROOT)
- list(APPEND extra_args -DCMAKE_SYSROOT=${CMAKE_SYSROOT})
+function(bolt_rt_target_supported target supported)
+
+ if(${target} STREQUAL ${HOST_NAME})
+ set(${supported} TRUE PARENT_SCOPE)
+ return()
+ elseif(${target} STREQUAL "X86")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --target=x86_64-linux-gnu")
+ elseif(${target} STREQUAL "AArch64")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --target=aarch64-linux-gnu")
+ elseif(${target} STREQUAL "RISCV")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --target=riscv64-linux-gnu")
+ endif()
+
+ try_compile(CROSS_COMP
+ ${CMAKE_BINARY_DIR}/CMakeFiles/CMakeScratch
+ SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test/test.cpp
+ CMAKE_FLAGS ""
+ TRY_COMP_OUTPUT)
+
+ if(CROSS_COMP)
+ message(STATUS "cross compilation test for ${target} was successful")
+ set(${supported} TRUE PARENT_SCOPE)
+ else()
+ message(STATUS "cross compilation test for ${target} was NOT successful")
+ set(${supported} FALSE PARENT_SCOPE)
+ endif()
+
+endfunction()
+
+if(BOLT_ENABLE_RUNTIME)
+ if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ set(HOST_NAME "X86")
+ elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+ set(HOST_NAME "AArch64")
+ elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64")
+ set(HOST_NAME "RISCV")
endif()
- include(ExternalProject)
- ExternalProject_Add(bolt_rt
- SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime"
- STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-stamps
- BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins
- CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
- -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
- -DCMAKE_BUILD_TYPE=Release
- -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
- -DLLVM_LIBDIR_SUFFIX=${LLVM_LIBDIR_SUFFIX}
- -DLLVM_LIBRARY_DIR=${LLVM_LIBRARY_DIR}
- -DBOLT_BUILT_STANDALONE=${BOLT_BUILT_STANDALONE}
- ${extra_args}
- INSTALL_COMMAND ""
- BUILD_ALWAYS True
+ # Further filter BOLT runtime targets: check if the runtime can be compiled
+ set(BOLT_RT_TARGETS_TO_BUILD)
+ if(CMAKE_C_COMPILER_ID MATCHES ".*Clang.*" AND CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*")
+ foreach(tgt ${BOLT_TARGETS_TO_BUILD})
+ bolt_rt_target_supported(${tgt} supported)
+ if(${supported})
+ list(APPEND BOLT_RT_TARGETS_TO_BUILD ${tgt})
+ endif()
+ endforeach()
+ else()
+ message(WARNING "BOLT runtime libraries require Clang")
+ endif()
+endif()
+
+if(BOLT_ENABLE_RUNTIME)
+
+ foreach(tgt ${BOLT_RT_TARGETS_TO_BUILD})
+ message(STATUS "Building BOLT runtime libraries for ${tgt}")
+ set(extra_args "")
+ if(CMAKE_SYSROOT)
+ list(APPEND extra_args -DCMAKE_SYSROOT=${CMAKE_SYSROOT})
+ endif()
+
+ # set up paths: target-specific libs will be generated under lib/${tgt}/
+ set(BOLT_RT_LIBRARY_DIR "${LLVM_LIBRARY_DIR}")
+ set(SUBDIR "${tgt}")
+ cmake_path(APPEND BOLT_RT_LIBRARY_DIR ${BOLT_RT_LIBRARY_DIR} ${SUBDIR})
+ file(MAKE_DIRECTORY ${BOLT_RT_LIBRARY_DIR})
+
+ if(${tgt} STREQUAL ${HOST_NAME})
+ set(BOLT_RT_FLAGS)
+ elseif(${tgt} STREQUAL "AArch64")
+ set(BOLT_RT_FLAGS "--target=aarch64-linux-gnu")
+ elseif(${tgt} STREQUAL "X86")
+ set(BOLT_RT_FLAGS "--target=x86_64-linux-gnu")
+ elseif(${tgt} STREQUAL "RISCV")
+ set(BOLT_RT_FLAGS "--target=riscv64-linux-gnu")
+ endif()
+
+ include(ExternalProject)
+ ExternalProject_Add(bolt_rt_${tgt}
+ SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime"
+ STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-${tgt}-stamps
+ BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-${tgt}-bins
+ CMAKE_ARGS
+ -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+ -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+ -DCMAKE_BUILD_TYPE=Release
+ -DBOLT_RT_FLAGS=${BOLT_RT_FLAGS}
+ -DBOLT_RT_TARGET=${tgt}
+ -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
+ -DLLVM_LIBDIR_SUFFIX=${LLVM_LIBDIR_SUFFIX}
+ -DLLVM_LIBRARY_DIR=${BOLT_RT_LIBRARY_DIR}
+ -DBOLT_BUILT_STANDALONE=${BOLT_BUILT_STANDALONE}
+ ${extra_args}
+ INSTALL_COMMAND ""
+ BUILD_ALWAYS True
)
- install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins/cmake_install.cmake \)"
- COMPONENT bolt)
- add_llvm_install_targets(install-bolt_rt
- DEPENDS bolt_rt bolt
- COMPONENT bolt)
- set(LIBBOLT_RT_INSTR "${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins/lib${LLVM_LIBDIR_SUFFIX}/libbolt_rt_instr.a")
- set(LIBBOLT_RT_HUGIFY "${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins/lib${LLVM_LIBDIR_SUFFIX}/libbolt_rt_hugify.a")
+ install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-${tgt}-bins/cmake_install.cmake \)"
+ COMPONENT bolt)
+ add_llvm_install_targets(install-bolt_rt_${tgt}
+ DEPENDS bolt_rt_${tgt} bolt
+ COMPONENT bolt)
+ set(LIBBOLT_RT_INSTR "${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-${tgt}-bins/lib/libbolt_rt_instr.a")
+ set(LIBBOLT_RT_HUGIFY "${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-${tgt}-bins/lib/libbolt_rt_hugify.a")
+ endforeach()
endif()
find_program(GNU_LD_EXECUTABLE NAMES ${LLVM_DEFAULT_TARGET_TRIPLE}-ld.bfd ld.bfd DOC "GNU ld")
diff --git a/bolt/cmake/test/test.cpp b/bolt/cmake/test/test.cpp
new file mode 100644
index 0000000000000..4e43f4be13959
--- /dev/null
+++ b/bolt/cmake/test/test.cpp
@@ -0,0 +1,3 @@
+#include <stdio.h>
+
+int main() { return 0; }
diff --git a/bolt/include/bolt/RuntimeLibs/RuntimeLibrary.h b/bolt/include/bolt/RuntimeLibs/RuntimeLibrary.h
index fc1db7369eb4a..e757bb4b848b9 100644
--- a/bolt/include/bolt/RuntimeLibs/RuntimeLibrary.h
+++ b/bolt/include/bolt/RuntimeLibs/RuntimeLibrary.h
@@ -61,14 +61,19 @@ class RuntimeLibrary {
/// Get the full path to a runtime library specified by \p LibFileName and \p
/// ToolPath.
static std::string getLibPathByToolPath(StringRef ToolPath,
+ StringRef ToolSubPath,
StringRef LibFileName);
+ /// Create architecture-specific ToolSubPath to be used in the full path
+ static std::string createToolSubPath(StringRef ArchName);
+
/// Get the full path to a runtime library by the install directory.
- static std::string getLibPathByInstalled(StringRef LibFileName);
+ static std::string getLibPathByInstalled(StringRef ToolSubPath, StringRef LibFileName);
/// Gets the full path to a runtime library based on whether it exists
/// in the install libdir or runtime libdir.
- static std::string getLibPath(StringRef ToolPath, StringRef LibFileName);
+ static std::string getLibPath(StringRef ToolPath, StringRef ToolSubPath,
+ StringRef LibFileName);
/// Load a static runtime library specified by \p LibPath.
static void loadLibrary(StringRef LibPath, BOLTLinker &Linker,
diff --git a/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp
index 059b1239d806b..f2fd279d5f6f0 100644
--- a/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp
+++ b/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp
@@ -63,7 +63,14 @@ void HugifyRuntimeLibrary::link(BinaryContext &BC, StringRef ToolPath,
BOLTLinker &Linker,
BOLTLinker::SectionsMapper MapSections) {
- std::string LibPath = getLibPath(ToolPath, opts::RuntimeHugifyLib);
+ // If the default filename is selected, add architecture-specific Target
+ // subdirectory to it.
+ std::string ToolSubPath = "";
+ if (opts::RuntimeHugifyLib == "libbolt_rt_hugify.a") {
+ ToolSubPath = createToolSubPath(BC.TheTriple->getArchName().str().c_str());
+ }
+ std::string LibPath =
+ getLibPath(ToolPath, ToolSubPath, opts::RuntimeHugifyLib);
loadLibrary(LibPath, Linker, MapSections);
assert(!RuntimeStartAddress &&
diff --git a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp
index bca3c440275f7..3cb698b577a22 100644
--- a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp
+++ b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp
@@ -199,7 +199,15 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
void InstrumentationRuntimeLibrary::link(
BinaryContext &BC, StringRef ToolPath, BOLTLinker &Linker,
BOLTLinker::SectionsMapper MapSections) {
- std::string LibPath = getLibPath(ToolPath, opts::RuntimeInstrumentationLib);
+
+ // If the default filename is selected, add architecture-specific Target
+ // subdirectory to it.
+ std::string ToolSubPath = "";
+ if (opts::RuntimeInstrumentationLib == "libbolt_rt_instr.a") {
+ ToolSubPath = createToolSubPath(BC.TheTriple->getArchName().str().c_str());
+ }
+ std::string LibPath =
+ getLibPath(ToolPath, ToolSubPath, opts::RuntimeInstrumentationLib);
loadLibrary(LibPath, Linker, MapSections);
if (BC.isMachO())
diff --git a/bolt/lib/RuntimeLibs/RuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/RuntimeLibrary.cpp
index 98852ee691ceb..0dbd9b06a5ff5 100644
--- a/bolt/lib/RuntimeLibs/RuntimeLibrary.cpp
+++ b/bolt/lib/RuntimeLibs/RuntimeLibrary.cpp
@@ -28,6 +28,7 @@ using namespace bolt;
void RuntimeLibrary::anchor() {}
std::string RuntimeLibrary::getLibPathByToolPath(StringRef ToolPath,
+ StringRef ToolSubPath,
StringRef LibFileName) {
StringRef Dir = llvm::sys::path::parent_path(ToolPath);
SmallString<128> LibPath = llvm::sys::path::parent_path(Dir);
@@ -38,45 +39,45 @@ std::string RuntimeLibrary::getLibPathByToolPath(StringRef ToolPath,
LibPath = llvm::sys::path::parent_path(llvm::sys::path::parent_path(Dir));
llvm::sys::path::append(LibPath, "lib" LLVM_LIBDIR_SUFFIX);
}
+ llvm::sys::path::append(LibPath, ToolSubPath);
llvm::sys::path::append(LibPath, LibFileName);
- if (!llvm::sys::fs::exists(LibPath)) {
- // If it is a symlink, check the directory that the symlink points to.
- if (llvm::sys::fs::is_symlink_file(ToolPath)) {
- SmallString<256> RealPath;
- llvm::sys::fs::real_path(ToolPath, RealPath);
- if (llvm::ErrorOr<std::string> P =
- llvm::sys::findProgramByName(RealPath)) {
- outs() << "BOLT-INFO: library not found: " << LibPath << "\n"
- << "BOLT-INFO: " << ToolPath << " is a symlink; will look up "
- << LibFileName
- << " at the target directory that the symlink points to\n";
- return getLibPath(*P, LibFileName);
- }
- }
- errs() << "BOLT-ERROR: library not found: " << LibPath << "\n";
- exit(1);
- }
return std::string(LibPath);
}
-std::string RuntimeLibrary::getLibPathByInstalled(StringRef LibFileName) {
+std::string RuntimeLibrary::createToolSubPath(StringRef ArchName) {
+ std::string ToolSubPath = "";
+ if (ArchName == "x86_64")
+ ToolSubPath = "X86";
+ else if (ArchName == "aarch64")
+ ToolSubPath = "AArch64";
+ else if (ArchName == "riscv64")
+ ToolSubPath = "RISCV";
+ else
+ llvm_unreachable("Unsupported architecture");
+
+ return ToolSubPath;
+}
+
+std::string RuntimeLibrary::getLibPathByInstalled(StringRef ToolSubPath, StringRef LibFileName) {
SmallString<128> LibPath(CMAKE_INSTALL_FULL_LIBDIR);
+ llvm::sys::path::append(LibPath, ToolSubPath);
llvm::sys::path::append(LibPath, LibFileName);
return std::string(LibPath);
}
std::string RuntimeLibrary::getLibPath(StringRef ToolPath,
+ StringRef ToolSubPath,
StringRef LibFileName) {
if (llvm::sys::fs::exists(LibFileName)) {
return std::string(LibFileName);
}
- std::string ByTool = getLibPathByToolPath(ToolPath, LibFileName);
+ std::string ByTool = getLibPathByToolPath(ToolPath, ToolSubPath, LibFileName);
if (llvm::sys::fs::exists(ByTool)) {
return ByTool;
}
- std::string ByInstalled = getLibPathByInstalled(LibFileName);
+ std::string ByInstalled = getLibPathByInstalled(ToolSubPath, LibFileName);
if (llvm::sys::fs::exists(ByInstalled)) {
return ByInstalled;
}
diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt
index 63f178bd263c2..bd8828afd4b59 100644
--- a/bolt/runtime/CMakeLists.txt
+++ b/bolt/runtime/CMakeLists.txt
@@ -29,8 +29,8 @@ if(NOT BOLT_BUILT_STANDALONE)
add_custom_command(TARGET bolt_rt_hugify POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/libbolt_rt_hugify.a" "${LLVM_LIBRARY_DIR}")
endif()
-
-set(BOLT_RT_FLAGS
+# In case of compiling with clang, the '--target' option is passed in BOLT_RT_FLAGS.
+set(BOLT_RT_FLAGS ${BOLT_RT_FLAGS}
-ffreestanding
-fno-exceptions
-fno-rtti
@@ -41,15 +41,15 @@ set(BOLT_RT_FLAGS
# Refs: llvm/llvm-project#148595 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77882
-fomit-frame-pointer
)
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+if (${BOLT_RT_TARGET} STREQUAL "x86_64")
set(BOLT_RT_FLAGS ${BOLT_RT_FLAGS}
-mno-sse
-mgeneral-regs-only)
endif()
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
+if (${BOLT_RT_TARGET} STREQUAL "riscv64")
set(BOLT_RT_FLAGS ${BOLT_RT_FLAGS})
endif()
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+if(${BOLT_RT_TARGET} STREQUAL "AArch64")
check_cxx_compiler_flag("-mno-outline-atomics" CXX_SUPPORTS_OUTLINE_ATOMICS)
if (CXX_SUPPORTS_OUTLINE_ATOMICS)
set(BOLT_RT_FLAGS ${BOLT_RT_FLAGS}
@@ -64,8 +64,8 @@ target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS})
target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
-install(TARGETS bolt_rt_instr DESTINATION "lib${LLVM_LIBDIR_SUFFIX}")
-install(TARGETS bolt_rt_hugify DESTINATION "lib${LLVM_LIBDIR_SUFFIX}")
+install(TARGETS bolt_rt_instr DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}/${BOLT_RT_TARGET}")
+install(TARGETS bolt_rt_hugify DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}/${BOLT_RT_TARGET}")
if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*" AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
add_library(bolt_rt_instr_osx STATIC
diff --git a/bolt/tools/driver/CMakeLists.txt b/bolt/tools/driver/CMakeLists.txt
index 4b3c7416de974..eb2107e1d7c9c 100644
--- a/bolt/tools/driver/CMakeLists.txt
+++ b/bolt/tools/driver/CMakeLists.txt
@@ -6,7 +6,9 @@ set(LLVM_LINK_COMPONENTS
)
if (BOLT_ENABLE_RUNTIME)
- set(BOLT_DRIVER_DEPS "bolt_rt")
+ foreach(tgt ${BOLT_RT_TARGETS_TO_BUILD})
+ set(BOLT_DRIVER_DEPS ${BOLT_DRIVER_DEPS} "bolt_rt_${tgt}")
+ endforeach()
else()
set(BOLT_DRIVER_DEPS "")
endif()
>From d4f1f4c3bfd4de68111398fa01ba0f2e6a1dbd48 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Mon, 8 Dec 2025 11:28:43 +0100
Subject: [PATCH 2/8] Spawn buildid-list perf job at start
---
bolt/include/bolt/Profile/DataAggregator.h | 1 +
bolt/lib/Profile/DataAggregator.cpp | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index db0f6903185b7..60f2962423e94 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -158,6 +158,7 @@ class DataAggregator : public DataReader {
};
/// Process info for spawned processes
+ PerfProcessInfo BuildIDProcessInfo;
PerfProcessInfo MainEventsPPI;
PerfProcessInfo MemEventsPPI;
PerfProcessInfo MMapEventsPPI;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index cce08658fefb9..dbfe7f524090b 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -233,6 +233,8 @@ void DataAggregator::start() {
launchPerfProcess("task events", TaskEventsPPI,
"script --show-task-events --no-itrace");
+
+ launchPerfProcess("buildid list", BuildIDProcessInfo, "buildid-list");
}
void DataAggregator::abort() {
@@ -303,8 +305,6 @@ void DataAggregator::processFileBuildID(StringRef FileBuildID) {
errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
};
- PerfProcessInfo BuildIDProcessInfo;
- launchPerfProcess("buildid list", BuildIDProcessInfo, "buildid-list");
if (prepareToParse("buildid", BuildIDProcessInfo, WarningCallback))
return;
>From 667bfe7b4455d017e62fec8fa4c38ee663c97300 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Mon, 8 Dec 2025 12:22:04 +0100
Subject: [PATCH 3/8] Add support to generate pre-parsed perf data
The generator relies on the aggregator work to spawn the required
perf-script jobs based on the the aggregation type, and merges
the results of the pref-script jobs into a single file.
This hybrid profile will contain all required events such as BuildID,
MMAP, TASK, Branch/BrStack, or Memory event for the aggregation.
The generator also creates a file header, where these events
are listed along with the length information of their contents.
To generate a pre-parsed perf data as an input for BasicAggregation:
`perf2bolt -p perf.data -o perf.text --ba --generate-perf-text-data`
Or for Spe BranchAggregation:
`perf2bolt -p perf.data -o perf.text --spe --generate-perf-text-data`
The results place into the perf.text output file.
---
bolt/include/bolt/Profile/DataAggregator.h | 70 ++++++++++++++++++++++
bolt/lib/Profile/DataAggregator.cpp | 69 ++++++++++++++++++++-
2 files changed, 138 insertions(+), 1 deletion(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 60f2962423e94..ad965a39402a9 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -149,8 +149,24 @@ class DataAggregator : public DataReader {
/// Perf utility full path name
std::string PerfPath;
+ enum PerfProcessType {
+ BUILDIDS = 0,
+ MAIN_EVENTS,
+ MEN_EVENTS,
+ MMAP_EVENTS,
+ TASK_EVENTS
+ };
+ friend raw_ostream &operator<<(raw_ostream &OS, const PerfProcessType &T);
+
/// Perf process spawning bookkeeping
struct PerfProcessInfo {
+ static constexpr StringLiteral BuildIDEventStr = "BUILDIDS";
+ static constexpr StringLiteral MainEventStr = "MAIN";
+ static constexpr StringLiteral MemEventStr = "MEM";
+ static constexpr StringLiteral MMapEventStr = "MMAP";
+ static constexpr StringLiteral TaskEventsStr = "TASK";
+
+ enum PerfProcessType Type;
bool IsFinished{false};
sys::ProcessInfo PI;
SmallVector<char, 256> StdoutPath;
@@ -239,6 +255,9 @@ class DataAggregator : public DataReader {
/// parsing.
void launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, StringRef Args);
+ /// Helps to generate pre-parsed perf text profile.
+ uint64_t getFileSize(const StringRef File);
+
/// Delete all temporary files created to hold the output generated by spawned
/// subprocesses during the aggregation job
void deleteTempFiles();
@@ -445,6 +464,35 @@ class DataAggregator : public DataReader {
/// an external tool.
std::error_code parsePreAggregatedLBRSamples();
+ /// Dump pre-parsed perf profile data into a single file.
+ /// The generator relies on the aggregator work to spawn the required
+ /// perf-script jobs based on the the aggregation type, and merges
+ /// the results of the pref-script jobs into a single file.
+ /// This hybrid profile contains all required events such as BuildID,
+ /// MMAP, TASK, Branch/BrStack, or Memory for the aggregation.
+ /// The generator also creates a file header, where these events
+ /// are listed along with the length information of their contents.
+ /// This is how a pre-parsed profile data looks like for Basic Aggregation:
+ ///
+ /// perf2bolt -p perf.data -o perf.text --ba --generate-perf-text-data
+ ///
+ /// PERFTEXT BUILDIDS=55;MMAP=2523121;MAIN=6426;TASK=352203;
+ /// 68c3da33ca43d5a74d501b5ea0012f782e04096e /example/bin1
+ /// c3a8496f2347b468a54a21072dc6cde7f0d88c6c /example/bin2
+ /// ...
+ /// bin1 20470 ... PERF_RECORD_MMAP2 20470/20470: ... r-xp /example/bin1
+ /// bin1 20470 ... PERF_RECORD_MMAP2 20470/20470: ... r-xp [vdso]
+ /// ...
+ /// bin1 20470 ... PERF_RECORD_COMM exec: bin1:20470/20470
+ /// bin1 20470 ... PERF_RECORD_EXIT(20470:20470):(20469:20469)
+ /// ...
+ /// 20470 branch: ffffffd1a4764d04 ffffffd1a4764cfc
+ /// 20470 branch: ffffffd1a44777f4 ffffffd1a4fc8af0
+ /// 20470 branch: ffffffd1a477cd14 ffffffd1a477cd00
+ /// 20470 branch: ffffffd1a4400f58 ffffffd1a4400f7c
+ /// ...
+ void generatePerfTextData();
+
/// If \p Address falls into the binary address space based on memory
/// mapping info \p MMI, then adjust it for further processing by subtracting
/// the base load address. External addresses, i.e. addresses that do not
@@ -595,6 +643,28 @@ inline raw_ostream &operator<<(raw_ostream &OS,
OS << " ... " << Twine::utohexstr(T.To);
return OS;
}
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+ const DataAggregator::PerfProcessType &T) {
+ switch (T) {
+ case DataAggregator::PerfProcessType::BUILDIDS:
+ OS << DataAggregator::PerfProcessInfo::BuildIDEventStr;
+ break;
+ case DataAggregator::PerfProcessType::MAIN_EVENTS:
+ OS << DataAggregator::PerfProcessInfo::MainEventStr;
+ break;
+ case DataAggregator::PerfProcessType::MEN_EVENTS:
+ OS << DataAggregator::PerfProcessInfo::MemEventStr;
+ break;
+ case DataAggregator::PerfProcessType::MMAP_EVENTS:
+ OS << DataAggregator::PerfProcessInfo::MMapEventStr;
+ break;
+ case DataAggregator::PerfProcessType::TASK_EVENTS:
+ OS << DataAggregator::PerfProcessInfo::TaskEventsStr;
+ break;
+ }
+ return OS;
+}
} // namespace bolt
} // namespace llvm
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index dbfe7f524090b..9f7bdb8f99dd4 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -127,6 +127,11 @@ cl::opt<std::string>
"perf-script output in a textual format"),
cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory));
+cl::opt<bool> GeneratePerfTextProfile(
+ "generate-perf-text-data",
+ cl::desc("Dump perf-script jobs' output into output file"), cl::Hidden,
+ cl::cat(AggregatorCategory));
+
static cl::opt<bool>
TimeAggregator("time-aggr",
cl::desc("time BOLT aggregator"),
@@ -141,6 +146,8 @@ namespace {
const char TimerGroupName[] = "aggregator";
const char TimerGroupDesc[] = "Aggregator";
+constexpr const StringLiteral PerfTextMagicStr = "PERFTEXT";
+
std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
std::vector<SectionNameAndRange> sections;
for (BinarySection &Section : BC->sections()) {
@@ -169,6 +176,17 @@ void deleteTempFile(const std::string &FileName) {
}
}
+uint64_t DataAggregator::getFileSize(const StringRef File) {
+ uint64_t Size;
+ std::error_code EC = sys::fs::file_size(File, Size);
+ if (EC) {
+ errs() << "unable to obtain file size: " << EC.message() << "\n";
+ deleteTempFiles();
+ exit(1);
+ }
+ return Size;
+}
+
void DataAggregator::deleteTempFiles() {
for (std::string &FileName : TempFiles)
deleteTempFile(FileName);
@@ -382,6 +400,53 @@ void DataAggregator::parsePreAggregated() {
}
}
+void DataAggregator::generatePerfTextData() {
+ std::error_code EC;
+ raw_fd_ostream OutFile(opts::OutputFilename, EC, sys::fs::OpenFlags::OF_None);
+ if (EC) {
+ errs() << "error opening output file: " << EC.message() << "\n";
+ deleteTempFiles();
+ exit(1);
+ }
+
+ SmallVector<PerfProcessInfo *, 5> ProcessInfos = {
+ &BuildIDProcessInfo, &MMapEventsPPI, &MainEventsPPI, &TaskEventsPPI};
+ if (opts::ParseMemProfile)
+ ProcessInfos.push_back(&MemEventsPPI);
+
+ // Create a file header as a table of the contents
+ // PERFTEXT;EVENT1={$SIZE};EVENT2={$SIZE}...
+ OutFile << PerfTextMagicStr << ";";
+ for (const auto PPI : ProcessInfos) {
+ std::string Error;
+ sys::Wait(PPI->PI, std::nullopt, &Error);
+ if (!Error.empty()) {
+ errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
+ deleteTempFiles();
+ exit(1);
+ }
+ uint64_t FS = getFileSize(PPI->StdoutPath.data());
+ OutFile << PPI->Type << "=" << FS << ";";
+ }
+ OutFile << "\n";
+
+ // Merge all perf-scripts jobs' output into the single OutputFile
+ for (const auto PPI : ProcessInfos) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileOrSTDIN(PPI->StdoutPath.data());
+ if (std::error_code EC = MB.getError()) {
+ errs() << "Cannot open " << PPI->StdoutPath.data() << ": " << EC.message()
+ << "\n";
+ deleteTempFiles();
+ exit(1);
+ }
+ OutFile << (*MB)->getBuffer();
+ }
+ OutFile.close();
+ deleteTempFiles();
+ exit(0);
+}
+
void DataAggregator::filterBinaryMMapInfo() {
if (opts::FilterPID) {
auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
@@ -594,7 +659,9 @@ void DataAggregator::imputeFallThroughs() {
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;
- if (opts::ReadPreAggregated) {
+ if (opts::GeneratePerfTextProfile) {
+ generatePerfTextData();
+ } else if (opts::ReadPreAggregated) {
parsePreAggregated();
} else {
parsePerfData(BC);
>From fea78b43ee2a8e7a574bf6384f3c610c2a875e30 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Mon, 19 Jan 2026 16:25:07 +0100
Subject: [PATCH 4/8] Address reviewers 1
---
bolt/include/bolt/Profile/DataAggregator.h | 62 +++++++++++++---------
bolt/lib/Profile/DataAggregator.cpp | 37 ++++++++-----
2 files changed, 62 insertions(+), 37 deletions(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index ad965a39402a9..a56d7a91f6f95 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -152,7 +152,7 @@ class DataAggregator : public DataReader {
enum PerfProcessType {
BUILDIDS = 0,
MAIN_EVENTS,
- MEN_EVENTS,
+ MEM_EVENTS,
MMAP_EVENTS,
TASK_EVENTS
};
@@ -168,17 +168,17 @@ class DataAggregator : public DataReader {
enum PerfProcessType Type;
bool IsFinished{false};
- sys::ProcessInfo PI;
- SmallVector<char, 256> StdoutPath;
- SmallVector<char, 256> StderrPath;
+ sys::ProcessInfo PI{};
+ SmallVector<char, 256> StdoutPath{};
+ SmallVector<char, 256> StderrPath{};
};
/// Process info for spawned processes
- PerfProcessInfo BuildIDProcessInfo;
- PerfProcessInfo MainEventsPPI;
- PerfProcessInfo MemEventsPPI;
- PerfProcessInfo MMapEventsPPI;
- PerfProcessInfo TaskEventsPPI;
+ PerfProcessInfo BuildIDProcessInfo = {PerfProcessType::BUILDIDS};
+ PerfProcessInfo MainEventsPPI = {PerfProcessType::MAIN_EVENTS};
+ PerfProcessInfo MemEventsPPI = {PerfProcessType::MEM_EVENTS};
+ PerfProcessInfo MMapEventsPPI = {PerfProcessType::MMAP_EVENTS};
+ PerfProcessInfo TaskEventsPPI = {PerfProcessType::TASK_EVENTS};
/// Kernel VM starts at fixed based address
/// https://www.kernel.org/doc/Documentation/x86/x86_64/mm.txt
@@ -467,29 +467,41 @@ class DataAggregator : public DataReader {
/// Dump pre-parsed perf profile data into a single file.
/// The generator relies on the aggregator work to spawn the required
/// perf-script jobs based on the the aggregation type, and merges
- /// the results of the pref-script jobs into a single file.
+ /// their results into a single file.
/// This hybrid profile contains all required events such as BuildID,
- /// MMAP, TASK, Branch/BrStack, or Memory for the aggregation.
+ /// MMAP, TASK, MAIN (brstack or basic samples), or MEM for the aggregation.
/// The generator also creates a file header, where these events
/// are listed along with the length information of their contents.
- /// This is how a pre-parsed profile data looks like for Basic Aggregation:
+ /// The given length numbers in the header are in bytes, they are used
+ /// as an offset int the pre-parsed profile.
+ /// Some of these events are essential to be presented in the file.
+ /// Please see a short summary below:
+ /// MEM: Optional. Parsing memory profile is enabled by default, unless
+ /// '--itrace' aggregation is set (like Arm SPE). In the latter case
+ /// MEM profile won't be added into the pre-parsed profile.
+ /// MMAP: Compulsory, the mmap data is required to be in the file.
+ /// BUILDID: Ignored (you should use --ignore-build-id),
+ /// if buildid information doesn't exist in the input profile.
+ /// TASK: When task related data exists in the input profile,
+ /// Perf2bolt will always parse it.
+ /// MAIN: Compulsory; the MAIN events always have to be represented in the
+ /// file. Main events could be either 'brstack' or 'basic' sample data
+ /// based on how it was collected by Linux Perf.
///
- /// perf2bolt -p perf.data -o perf.text --ba --generate-perf-text-data
+ /// Example how you can generate pre-parsed profile for 'basic' aggregation:
+ /// perf2bolt -p perf.data BINARY -o perf.text --ba --generate-perf-text-data
///
- /// PERFTEXT BUILDIDS=55;MMAP=2523121;MAIN=6426;TASK=352203;
- /// 68c3da33ca43d5a74d501b5ea0012f782e04096e /example/bin1
- /// c3a8496f2347b468a54a21072dc6cde7f0d88c6c /example/bin2
+ /// This is how a pre-parsed profile data looks like for Basic Aggregation:
+ /// PERFTEXT;BUILDIDS=50;MMAP=3000000;MAIN=5000;TASK=350000;
+ /// abcd1234 /example/bin1
/// ...
- /// bin1 20470 ... PERF_RECORD_MMAP2 20470/20470: ... r-xp /example/bin1
- /// bin1 20470 ... PERF_RECORD_MMAP2 20470/20470: ... r-xp [vdso]
+ /// bin1 1234 ... PERF_RECORD_MMAP2 1234/1234: ... r-xp /example/bin1
/// ...
- /// bin1 20470 ... PERF_RECORD_COMM exec: bin1:20470/20470
- /// bin1 20470 ... PERF_RECORD_EXIT(20470:20470):(20469:20469)
+ /// bin1 1234 ... PERF_RECORD_COMM exec: bin1:1234/1234
+ /// bin1 1234 ... PERF_RECORD_EXIT(1234:1234):(20469:20469)
/// ...
- /// 20470 branch: ffffffd1a4764d04 ffffffd1a4764cfc
- /// 20470 branch: ffffffd1a44777f4 ffffffd1a4fc8af0
- /// 20470 branch: ffffffd1a477cd14 ffffffd1a477cd00
- /// 20470 branch: ffffffd1a4400f58 ffffffd1a4400f7c
+ /// 1234 branch: ffffffd1a4764d04 ffffffd1a4764cfc
+ /// 1234 branch: ffffffd1a44777f4 ffffffd1a4fc8af0
/// ...
void generatePerfTextData();
@@ -653,7 +665,7 @@ inline raw_ostream &operator<<(raw_ostream &OS,
case DataAggregator::PerfProcessType::MAIN_EVENTS:
OS << DataAggregator::PerfProcessInfo::MainEventStr;
break;
- case DataAggregator::PerfProcessType::MEN_EVENTS:
+ case DataAggregator::PerfProcessType::MEM_EVENTS:
OS << DataAggregator::PerfProcessInfo::MemEventStr;
break;
case DataAggregator::PerfProcessType::MMAP_EVENTS:
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 9f7bdb8f99dd4..d4f160e6f5862 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -323,6 +323,7 @@ void DataAggregator::processFileBuildID(StringRef FileBuildID) {
errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
};
+ // Extract buildid information from the input profile in the start() function.
if (prepareToParse("buildid", BuildIDProcessInfo, WarningCallback))
return;
@@ -414,35 +415,47 @@ void DataAggregator::generatePerfTextData() {
if (opts::ParseMemProfile)
ProcessInfos.push_back(&MemEventsPPI);
- // Create a file header as a table of the contents
- // PERFTEXT;EVENT1={$SIZE};EVENT2={$SIZE}...
- OutFile << PerfTextMagicStr << ";";
+ // Create a file header as a table of the contents.
+ // Initially pre-allocate enough space for the header at the beginning of
+ // the file.
+ // The header can be maximum 132 character, this number is pre-calculated,
+ // the sum of the length of the magic strings, event names, their sizes,
+ // and the field separators.
+ // PERFTEXT;EVENT1={0x$SIZE};EVENT2={0x$SIZE}...
+ // The size of the events are printed in hex format (16 width) in order to be
+ // predictable fixed length.
+ OutFile << std::string(132, ' ') << "\n";
+ std::string Header;
+ raw_string_ostream SS(Header);
+ SS << PerfTextMagicStr << ";";
for (const auto PPI : ProcessInfos) {
std::string Error;
+ auto PathData = PPI->StdoutPath.data();
sys::Wait(PPI->PI, std::nullopt, &Error);
if (!Error.empty()) {
errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
deleteTempFiles();
exit(1);
}
- uint64_t FS = getFileSize(PPI->StdoutPath.data());
- OutFile << PPI->Type << "=" << FS << ";";
- }
- OutFile << "\n";
- // Merge all perf-scripts jobs' output into the single OutputFile
- for (const auto PPI : ProcessInfos) {
+ SS << PPI->Type << formatv("={0:x16};", getFileSize(PathData));
+
+ // Merge all perf-scripts jobs' output into the single OutputFile
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
- MemoryBuffer::getFileOrSTDIN(PPI->StdoutPath.data());
+ MemoryBuffer::getFileOrSTDIN(PathData);
if (std::error_code EC = MB.getError()) {
- errs() << "Cannot open " << PPI->StdoutPath.data() << ": " << EC.message()
- << "\n";
+ errs() << "Cannot open " << PathData << ": " << EC.message() << "\n";
deleteTempFiles();
exit(1);
}
OutFile << (*MB)->getBuffer();
}
+
+ OutFile.seek(0);
+ OutFile << Header;
OutFile.close();
+ outs() << "PERF2BOLT: Profile is saved to file " << opts::OutputFilename
+ << "\n";
deleteTempFiles();
exit(0);
}
>From d5d44a7f81cb13f5da724a54933f5c9fd5f3bdf1 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Wed, 21 Jan 2026 16:24:20 +0100
Subject: [PATCH 5/8] Update the description
---
bolt/include/bolt/Profile/DataAggregator.h | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index a56d7a91f6f95..bda3ec5099fca 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -477,12 +477,13 @@ class DataAggregator : public DataReader {
/// Some of these events are essential to be presented in the file.
/// Please see a short summary below:
/// MEM: Optional. Parsing memory profile is enabled by default, unless
- /// '--itrace' aggregation is set (like Arm SPE). In the latter case
- /// MEM profile won't be added into the pre-parsed profile.
+ /// '--itrace' aggregation is set. In the latter case MEM profile
+ /// won't be added into the pre-parsed profile. Note that, currently
+ /// mem events only supported if they were gathered on X86_64.
/// MMAP: Compulsory, the mmap data is required to be in the file.
/// BUILDID: Ignored (you should use --ignore-build-id),
/// if buildid information doesn't exist in the input profile.
- /// TASK: When task related data exists in the input profile,
+ /// TASK: If task related data exists in the input profile,
/// Perf2bolt will always parse it.
/// MAIN: Compulsory; the MAIN events always have to be represented in the
/// file. Main events could be either 'brstack' or 'basic' sample data
@@ -492,7 +493,8 @@ class DataAggregator : public DataReader {
/// perf2bolt -p perf.data BINARY -o perf.text --ba --generate-perf-text-data
///
/// This is how a pre-parsed profile data looks like for Basic Aggregation:
- /// PERFTEXT;BUILDIDS=50;MMAP=3000000;MAIN=5000;TASK=350000;
+ /// PERFTEXT;BUILDIDS=0x0000000000000032;MMAP=0x000000000002DC6C0;MAIN=0x00000000000001388;
+ /// TASK=0x00000000000055730;MEM=0x0000000000000128;
/// abcd1234 /example/bin1
/// ...
/// bin1 1234 ... PERF_RECORD_MMAP2 1234/1234: ... r-xp /example/bin1
@@ -500,9 +502,11 @@ class DataAggregator : public DataReader {
/// bin1 1234 ... PERF_RECORD_COMM exec: bin1:1234/1234
/// bin1 1234 ... PERF_RECORD_EXIT(1234:1234):(20469:20469)
/// ...
- /// 1234 branch: ffffffd1a4764d04 ffffffd1a4764cfc
- /// 1234 branch: ffffffd1a44777f4 ffffffd1a4fc8af0
+ /// 1234 branch: abcd1234 abcd1237
+ /// 1234 branch: abcd5678 abce9876
/// ...
+ /// 1234 mem-loads: efgh1234 efgh1234
+ /// 1234 mem-loads: efgh4567 efgh8910
void generatePerfTextData();
/// If \p Address falls into the binary address space based on memory
>From d394335fbd33cc5c2f57f919103143b4c7e7fd3b Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Mon, 26 Jan 2026 16:49:01 +0100
Subject: [PATCH 6/8] Print hex without 0x prefix
---
bolt/lib/Profile/DataAggregator.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index d4f160e6f5862..1d84911993e37 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -438,7 +438,7 @@ void DataAggregator::generatePerfTextData() {
exit(1);
}
- SS << PPI->Type << formatv("={0:x16};", getFileSize(PathData));
+ SS << PPI->Type << formatv("={0:x-16};", getFileSize(PathData));
// Merge all perf-scripts jobs' output into the single OutputFile
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
>From c7812ec2c63ded55f3a43ecfac1fddc6252a6d09 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Mon, 8 Dec 2025 16:47:24 +0100
Subject: [PATCH 7/8] [BOLT][Perf2bolt] Add support to read and parse
pre-aggregated profile
This PR implements the functionality to read and parse a pre-paresed
perf-script profile which was made by Perf2bolt's
'--generate-perf-text-data' option.
It helps to add support for large ARM Spe end-to-end tests.
Why does the test need to have a textual format Spe profile?
- To collect an Arm Spe profile by Linux Perf, it needs to have
an arm developer device which has Spe support.
- To decode Spe data, it also needs to have the proper version of
Linux Perf.
The minimum required version of Linux Perf is v6.15.
Bypassing these technical difficulties, that easier to prove
a pre-generated textual profile format.
How should generate this type of profile?
1) You can use Perf2bolt itself to generate a pre-parsed perf-script profile
in textual format.
$ perf2bolt BINARY -p perf.data -o test.text --spe --generate-perf-text-data
2) Perf2bolt is able to work with this type of profile:
$ perf2bolt BINARY -o test.fdata -p test.text --spe -perf-text-data
---
bolt/include/bolt/Profile/DataAggregator.h | 15 +++
bolt/lib/Profile/DataAggregator.cpp | 129 +++++++++++++++++++--
bolt/test/perf2bolt/Inputs/perf_test | Bin 0 -> 142568 bytes
3 files changed, 133 insertions(+), 11 deletions(-)
create mode 100755 bolt/test/perf2bolt/Inputs/perf_test
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index bda3ec5099fca..efeefc87d9f94 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -171,6 +171,8 @@ class DataAggregator : public DataReader {
sys::ProcessInfo PI{};
SmallVector<char, 256> StdoutPath{};
SmallVector<char, 256> StderrPath{};
+ uint64_t Length{0};
+ uint64_t Offset{0};
};
/// Process info for spawned processes
@@ -464,6 +466,19 @@ class DataAggregator : public DataReader {
/// an external tool.
std::error_code parsePreAggregatedLBRSamples();
+ /// Coordinate reading and parsing pre-parsed perf-script trace created by
+ /// Perf2bolt's '--generate-perf-text-data' option.
+ ///
+ /// We process the special header of the pre-parsed profile first to
+ /// determine an offset/length pairs for each events.
+ /// Later based on these information we open only a slice of the pre-parsed
+ /// file which belongs to the required event. After the preparation
+ /// parsePerfData function is invoked.
+ void parsePerfTextData(BinaryContext &BC);
+
+ /// Parse the header of the perf text file.
+ std::error_code parsePerfTextFileHeader();
+
/// Dump pre-parsed perf profile data into a single file.
/// The generator relies on the aggregator work to spawn the required
/// perf-script jobs based on the the aggregation type, and merges
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 1d84911993e37..6d51e8e80792d 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -121,11 +121,11 @@ cl::opt<bool> ReadPreAggregated(
"pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
cl::cat(AggregatorCategory));
-cl::opt<std::string>
- ReadPerfEvents("perf-script-events",
- cl::desc("skip perf event collection by supplying a "
- "perf-script output in a textual format"),
- cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory));
+cl::opt<bool> ReadPerfTextData(
+ "perf-text-data",
+ cl::desc("skip perf event collection by reading a "
+ "pre-parsed perf-script output in a textual format"),
+ cl::Hidden, cl::cat(AggregatorCategory));
cl::opt<bool> GeneratePerfTextProfile(
"generate-perf-text-data",
@@ -212,7 +212,7 @@ void DataAggregator::start() {
// Don't launch perf for pre-aggregated files or when perf input is specified
// by the user.
- if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty())
+ if (opts::ReadPreAggregated || opts::ReadPerfTextData)
return;
findPerfExecutable();
@@ -355,7 +355,7 @@ void DataAggregator::processFileBuildID(StringRef FileBuildID) {
}
bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
- if (opts::ReadPreAggregated)
+ if (opts::ReadPreAggregated || opts::ReadPerfTextData)
return true;
Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
@@ -401,6 +401,97 @@ void DataAggregator::parsePreAggregated() {
}
}
+std::error_code DataAggregator::parsePerfTextFileHeader() {
+ size_t LineEnd = ParsingBuf.find_first_of("\n");
+ if (LineEnd == StringRef::npos) {
+ reportError("expected rest of line");
+ Diag << "Found: " << ParsingBuf << "\n";
+ return make_error_code(llvm::errc::io_error);
+ }
+ StringRef HeaderLine = ParsingBuf.substr(0, LineEnd);
+ size_t HeaderLineSize = HeaderLine.size() + 1;
+
+ if (!HeaderLine.consume_front(PerfTextMagicStr)) {
+ reportError("expected 'PERFTEXT' magic string");
+ Diag << "Found: " << HeaderLine << "\n";
+ return make_error_code(llvm::errc::io_error);
+ }
+ Col += PerfTextMagicStr.size();
+
+ SmallVector<StringRef, 5> Events;
+ HeaderLine.trim().split(Events, ";", -1, false);
+
+ if (Events.empty()) {
+ reportError("missing events=sizes content");
+ Diag << "Found: " << HeaderLine << "\n";
+ return make_error_code(llvm::errc::io_error);
+ }
+
+ uint64_t Offset = HeaderLineSize;
+ uint64_t Length = 0;
+ for (StringRef EV : Events) {
+ StringRef EventStr, LengthStr;
+ std::tie(EventStr, LengthStr) = EV.split("=");
+
+ PerfProcessInfo *PPI =
+ StringSwitch<PerfProcessInfo *>(EventStr)
+ .Case(PerfProcessInfo::BuildIDEventStr, &BuildIDProcessInfo)
+ .Case(PerfProcessInfo::MainEventStr, &MainEventsPPI)
+ .Case(PerfProcessInfo::MemEventStr, &MemEventsPPI)
+ .Case(PerfProcessInfo::MMapEventStr, &MMapEventsPPI)
+ .Case(PerfProcessInfo::TaskEventsStr, &TaskEventsPPI)
+ .Default(nullptr);
+
+ if (!PPI) {
+ reportError("malformed text profile");
+ Diag << "Found: " << EventStr << " in " << HeaderLine << "\n";
+ return make_error_code(llvm::errc::io_error);
+ }
+
+ if (LengthStr.getAsInteger(16, Length)) {
+ reportError("expected hexadecimal number");
+ Diag << "Found: " << LengthStr << " in " << HeaderLine << "\n";
+ return make_error_code(llvm::errc::io_error);
+ }
+ PPI->Offset = Offset;
+ PPI->Length = Length;
+ Offset = Offset + Length;
+ Col += EV.size();
+ }
+ uint64_t FS = getFileSize(Filename);
+ if (FS != Offset) {
+ reportError("corrupted perf text profile");
+ Diag << "Found: " << FS << " != " << Offset << "\n";
+ return make_error_code(llvm::errc::io_error);
+ }
+ return std::error_code();
+}
+
+void DataAggregator::parsePerfTextData(BinaryContext &BC) {
+ outs() << "PERF2BOLT: parsing a hybrid perf-script events...\n";
+ NamedRegionTimer T("parsePerfTextData", "Parsing perf-script events",
+ TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileOrSTDIN(Filename);
+ if (std::error_code EC = MB.getError()) {
+ errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
+ << EC.message() << "\n";
+ exit(1);
+ }
+
+ ParsingBuf = (*MB)->getBuffer();
+ Col = 0;
+ Line = 1;
+ if (std::error_code EC = parsePerfTextFileHeader()) {
+ errs() << "PERF2BOLT-ERROR: failed to parse text header" << EC.message()
+ << "\n";
+ exit(1);
+ }
+
+ parsePerfData(BC);
+}
+
void DataAggregator::generatePerfTextData() {
std::error_code EC;
raw_fd_ostream OutFile(opts::OutputFilename, EC, sys::fs::OpenFlags::OF_None);
@@ -489,10 +580,24 @@ void DataAggregator::filterBinaryMMapInfo() {
int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
PerfProcessErrorCallbackTy Callback) {
- if (!opts::ReadPerfEvents.empty()) {
- outs() << "PERF2BOLT: using pre-processed perf events for '" << Name
- << "' (perf-script-events)\n";
- ParsingBuf = opts::ReadPerfEvents;
+ if (opts::ReadPerfTextData) {
+ if (Process.Length == 0) {
+ errs() << "PERF2BOLT-WARNING: your input profile was generated with "
+ << "parsing " << Process.Type << " event enabled. "
+ << "This data is missing from your pre-parsed profile.\n";
+ }
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileSlice(Filename, Process.Length, Process.Offset);
+ if (std::error_code EC = MB.getError()) {
+ errs() << "Cannot open " << Process.Type << ": " << EC.message() << "\n";
+ exit(1);
+ }
+
+ FileBuf = std::move(*MB);
+ ParsingBuf = FileBuf->getBuffer();
+ Col = 0;
+ Line = 1;
return 0;
}
@@ -676,6 +781,8 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
generatePerfTextData();
} else if (opts::ReadPreAggregated) {
parsePreAggregated();
+ } else if (opts::ReadPerfTextData) {
+ parsePerfTextData(BC);
} else {
parsePerfData(BC);
}
diff --git a/bolt/test/perf2bolt/Inputs/perf_test b/bolt/test/perf2bolt/Inputs/perf_test
new file mode 100755
index 0000000000000000000000000000000000000000..50d930355a5c0d88306f5bb71b091018e8e5910b
GIT binary patch
literal 142568
zcmeI$TWnm#8Nl(`bpinbHX#8MOtOMf8PP23E9Me#E}LwG9n#>ar4l_^@2>4_*Sqds
zsDtV at B@|&?q_nC~E4302JU~^cJWz>;sx=KFAo>uhs-o>nRDl!(t=u*(Zb{>QbIyGB
z$+0cyP3pt{MC&>8&3rR+emT2mcE6Gy*b|S%j2=zqZeweQTbuk)ziSw~DelgU={J4m
zGWom3w3&+}x<+38t~FYYxil@;xqfzZoA<11kCtt&pO(5pTiU$Uj?C4;fLfZCpr_4S
zE3d<?LAzd`mi=xWT88uNH744E4vDs4b}ip at 3)3>1*SaOhYu#^WH%7~pUo)n|m6~UJ
z+xb#%zLc9!%ctFZTB_&4x9fSkvX8n>H;g&p^6uIctaDlE)_0dH)&7%`XV>|!9yZ^;
zUJj;PRwmqlTIzhamWso at -M5zV+e*c9V{+SMU+=cw?qsc!?DWU8|IPNA_U=FAk6`aF
zH-%s44+01vfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*sr
zAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_0
z00IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b@
z2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000Iag
zfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}
z0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*sr
zAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_0
z00IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b@
z2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000Iag
zfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}
z0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*sr
zAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2q1s}0tg_0
z00IagfB*srAb<b at 2q1s}0tg_000IagfB*srAb<b at 2p|v>f(<J$TqUu at 3h^6OiQm0S
z{L>P*Im?f at iTVGU<Zs-JN{1HLsj<0ui7`#)3oiD;H=er4NS~?DAa>mnlw4yrhvNRJ
z=aq`XIj>gFR_oq)wpccvH#%M^yNGAJeM5s at zECZU7HjoFb!c!`sZuTsWrs_Jpl#(A
zFE^R>MvCQZsrYcg*g2B5O0w7N9oV;Xm)Du>O!h3EcXj(qNwlr;%XPZOYn%MD_DH<n
z#S{D2UeI6br>+mi(QDS>-VDX#8kF*MC?3ohG at c2?gAs%HTqqu_Ac*(7^;|5~aNPc%
z#jb0*vrpo7O&@EDnIS1Ru8Em3DZ8b7T*@ISpOvyM<*QOYBjp(>H_FQ%v4;<u*knuW
zy7d>YJ1+m3_8w at H*Q at cE`MB+{`{EW`Ps!^UnZv(VcU;l(SbW#z7Y$01#BKY54{4up
z?e at Mln!kT3|F~;!54Y!*+Mk#B3o?Hx+#VkdI at lNo0xJmoxp~XqJAS`!`i;+Krhk_?
z_SW2ip>)S=$7`7<w`b*cU)Xf-+L$@}^Tk8r(>-yMd2;O|wzz%1z9}=awpTi3zBXIG
zWZ`u4)}wZs6Rwp05BU9i4%*`8^O@<n%u9c`C-c&LQzrIG=8g09jWWPL<ZWjMShsL`
zq<QP^rF^@-quaMju4&wID06K4Z=|QmOut=UpPAWyzB_Kt6eVybzp$|I%GyoRFZQ6!
zy*R(kf9H$R=&s<9q)tzrn{Ld{q|Y^_=N~*VVvl6=)<=Dv3)*k_koM*uYKKVv{B+~&
zv8l7BammT_IXmvVm(HZ;zw`do+3EB-nb<CIX}-*{_gY-Plj->u>3%YO)_!oDOwY-+
znXgXG_3vypQ at 71b&CfK>p6Qdzai at bzr^8F9_IhX1bH`?5)9JZ)XFFyy$6juE at PxIH
zxz>sIlzdcJ at A$$gX<K)~EVlVInbaK%J>Tz&n}wTxAb*>;_RE>AkJ;9DPRlgfkMjRR
zz}_$Ru=^Eze6X-k3C at hYqs%5((ZW7A?QwWx at O1V&NqyD^<&cezf3&*tZ*Dw&W?$pt
ziA_PiKI65&_G@>~X??Ehy!P2_k24#B%*%qR(R_WT|MIG!{ZiNdxGVp1KDfRYUHi2z
zuKSnkTwSkA_hP#5vCn3EX#4u0Y6dn3E5F>;$6S5RUH_z;@4Tx&>gw{A at V)i_!KMGt
zi<H~1MJIa7t=kL~%bzb)???;`>`rucCR52&qJ6Yj at 0ZtDV>p?sjNefzJv4r+{j+VN
zTKQZdS5Hb~qPNi3)6+51aoccbZ%3-*_EgvHy<Od1xm at 2!N7ryqXJ0<o)78<NzuDwU
z+45-Op+dD*td!j(c7*@-JbQQTx+BqkXt+_XHxeCP$*yE-TW`a!c7D0DJDKXf*@dGT
zOLhDIWA}(JyBjv)n(Y0z#{6;F=i1eN|ErDecU^aLqnyOe<>q*+-swS$-FMpfY$SfK
ziyOC}4tg7tm6Mpc)@0oKMDL$*KYpdr`&IjYOHS<gx-ZxG({f^Wp?d#o{JSz>EPl<h
z_5V~ltaQDvuM+>2KmK|%>E7=;{%8F0m-~ERJH%U!zW=rVFYE=zuQJj5cE7~q at hcYJ
zf7*Y at k2f2AA8Pz9IoTi%hn=&(Czsf}E`HiM`}<;v&B-evpMtH<S*reVrHp^2xzzNz
z>ssUD35l=t{j*czEh~(3kHix$H(ZRY5`Scs_;V7!Av}OPe^KK0dJnkk(zX5A#U~>1
zpSk#P7x%wk$XKtt`13B_u5%@eW%<TAVUp!ay^!3obKkalcGM*EN6NJ$<0d&;ZY141
z{DO*C4XR_=+E~!LE|F at Xl(nhuY@$?`vr0Z&&n9!Vdbddy#=MbgcD&$?<)ypUOtM~>
zl&-~cQU1yo+M-GNx%Sk{R;$?~L8G3_6xs1&PL@!S1UtSAG4AE;w`Q;Ie{J at oc0tKf
zC6_JvU4ub~Yc*p(|HcbtNtUaW1S!XA^=fbyl=2-tJ6x2}Moq3-uhko}Ys^VdzMOjl
z`|cm|JhSwT-P74?M!%|uLY|kec%!AtaJJ;->y>KF%QhyBEMcNls2B3dlnlE<5Bt^L
zUA`x4tsXIfUA{3sene72HP6xoI<D(Pp*rH#3$?l|`hxZEIk;mm?WOncmJ0}efH3|~
z5?0aGy}bkX?c6co-M44Y{plfZXvfZhwEdCdK55Sv%U+{a$jki8f7A$%5&4C~F3|W_
z<P6;k{>@)L^0kULmM!P)4<r}<43f>~&3LigsG0Fbsa_OflP^A0%uC1J5A5GDxNn!<
z`u|BOZzuWOwV%ns at vJ-1Qui6I+3&08wzT^!yC>3oeJ*L4khbulpHCXI`!M_L(tO=d
zX=#5yu!o()u2c6ry3g7wF}ruseBIw at srw>*%4t6>KOy;cFQj$d4{4c_c6;dj(e>Xc
zZGR2r>wZp4_0#!v{r;fxCoM1Art|B5QOn1*qbsdJ_!yLmU6bbPept&{*Eu at B`hCjf
z?{xiiKdhzpv-$R1*RS!c#O#{%^IV at pT1LIJu4UQf>-z6<&#}c)+QWzDY3xgKV&~U<
zeZFgXN;|qz*RSEn<izIdeqNu)hg*XY-?NROqa}21Proj&aK7%p2Q~-!x{uei==#4I
z$=7|tn9J9F{A&5%j^rP8`{tuAKcR`PjIRHC5^t85-XFSOOSA>+k9uic%jYEbicr4p
z-`YQ7{*NTr+UxwfuS&W6r=7ok at 6vhoI({O#wu|QL at 42V057yr<ZQ(=nHFiQy!uh(t
zYTXj#NB_Uvax~q%7RlFrcEaT^c62Av`G5T(`O*H-`a6+)-LJO0`4etlEu#5<k_H=7
zKi$V?Tz>R=qxJVC*Tz!v3Llzp+rx+EYiW}M>2NP&`nLwrlv~*K(rmwj>-zQcXPxBh
h+QTDE+z{k%UYsasja1EsRq~&>G06XIBq35Y{{#`pM9BaE
literal 0
HcmV?d00001
>From 1ecc29141aa588983454c0031e42012e63554d2d Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Tue, 27 Jan 2026 09:56:02 +0100
Subject: [PATCH 8/8] Update PerfSpeEvent unittest
---
bolt/unittests/Profile/PerfSpeEvents.cpp | 30 +++++++++++++-----------
1 file changed, 16 insertions(+), 14 deletions(-)
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 4f060cd0aa7c8..b736c38f691c8 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -22,7 +22,7 @@ using namespace llvm::object;
using namespace llvm::ELF;
namespace opts {
-extern cl::opt<std::string> ReadPerfEvents;
+extern cl::opt<bool> ReadPerfTextData;
extern cl::opt<bool> ArmSPE;
} // namespace opts
@@ -92,10 +92,10 @@ struct PerfSpeEventsTestHelper : public testing::Test {
/// Parse and check SPE brstack as LBR.
void parseAndCheckBrstackEvents(
- uint64_t PID,
+ uint64_t PID, StringRef &Buffer,
const std::vector<std::pair<Trace, TakenBranchInfo>> &ExpectedSamples) {
DataAggregator DA("<pseudo input>");
- DA.ParsingBuf = opts::ReadPerfEvents;
+ DA.ParsingBuf = Buffer;
DA.BC = BC.get();
DataAggregator::MMapInfo MMap;
DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
@@ -134,14 +134,15 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
// ```
opts::ArmSPE = true;
- opts::ReadPerfEvents = " 1234 0xa001/0xa002/PN/-/-/10/COND/-\n"
- " 1234 0xb001/0xb002/P/-/-/4/RET/-\n"
- " 1234 0xc456/0xc789/P/-/-/13/-/-\n"
- " 1234 0xd123/0xd456/M/-/-/7/RET/-\n"
- " 1234 0xe001/0xe002/P/-/-/14/RET/-\n"
- " 1234 0xd123/0xd456/M/-/-/7/RET/-\n"
- " 1234 0xf001/0xf002/MN/-/-/8/COND/-\n"
- " 1234 0xc456/0xc789/M/-/-/13/-/-\n";
+ opts::ReadPerfTextData = true;
+ StringRef Buffer = " 1234 0xa001/0xa002/PN/-/-/10/COND/-\n"
+ " 1234 0xb001/0xb002/P/-/-/4/RET/-\n"
+ " 1234 0xc456/0xc789/P/-/-/13/-/-\n"
+ " 1234 0xd123/0xd456/M/-/-/7/RET/-\n"
+ " 1234 0xe001/0xe002/P/-/-/14/RET/-\n"
+ " 1234 0xd123/0xd456/M/-/-/7/RET/-\n"
+ " 1234 0xf001/0xf002/MN/-/-/8/COND/-\n"
+ " 1234 0xc456/0xc789/M/-/-/13/-/-\n";
// ExpectedSamples contains the aggregated information about
// a branch {{Branch From, To}, {TakenCount, MispredCount}}.
@@ -158,7 +159,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
{{0xe001, 0xe002, Trace::BR_ONLY}, {1, 0}},
{{0xf001, 0xf002, Trace::BR_ONLY}, {1, 1}}};
- parseAndCheckBrstackEvents(1234, ExpectedSamples);
+ parseAndCheckBrstackEvents(1234, Buffer, ExpectedSamples);
}
TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstackAndPbt) {
@@ -174,7 +175,8 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstackAndPbt) {
// ```
opts::ArmSPE = true;
- opts::ReadPerfEvents =
+ opts::ReadPerfTextData = true;
+ StringRef Buffer =
// "<PID> <SRC>/<DEST>/PN/-/-/10/COND/- <NULL>/<PBT>/-/-/-/0//-\n"
" 4567 0xa002/0xa003/PN/-/-/10/COND/- 0x0/0xa001/-/-/-/0//-\n"
" 4567 0xb002/0xb003/P/-/-/4/RET/- 0x0/0xb001/-/-/-/0//-\n"
@@ -246,7 +248,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstackAndPbt) {
{{0xf002, 0xf003, Trace::BR_ONLY}, {1, 1}},
{{0x0, 0xf001, 0xf002}, {1, 0}}};
- parseAndCheckBrstackEvents(4567, ExpectedSamples);
+ parseAndCheckBrstackEvents(4567, Buffer, ExpectedSamples);
}
#endif
More information about the llvm-commits
mailing list