[llvm] Add initial support for SPE brstack format (PR #129231)

Ádám Kallai via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 16 07:47:39 PDT 2025


https://github.com/kaadam updated https://github.com/llvm/llvm-project/pull/129231

>From 0786cae364e35c2aa42e35065945347815a72e97 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <paschalis.mpeis at arm.com>
Date: Fri, 20 Dec 2024 14:19:01 +0000
Subject: [PATCH 01/16] [BOLT][AArch64] Introduce SPE mode in BasicAggregation

BOLT gains the ability to process branch target information generated by
Arm SPE data, using the `BasicAggregation` format.

Example usage is:
```bash
perf2bolt -p perf.data -o perf.boltdata --nl --spe BINARY
```

New branch data and compatibility:
---
SPE branch entries in perf data contain a branch pair (`IP` -> `ADDR`)
for the source and destination branches. DataAggregator processes those
by creating two basic samples. Any other event types will have `ADDR`
field set to `0x0`. For those a single sample will be created. Such
events can be either SPE or non-SPE, like `l1d-access` and `cycles`
respectively.

The format of the input perf entries is:
```
PID   EVENT-TYPE   ADDR   IP
```

When on SPE mode and:
- host is not `AArch64`, BOLT will exit with a relevant message
- `ADDR` field is unavailable, BOLT will exit with a relevant message
- no branch pairs were recorded, BOLT will present a warning

Examples of generating profiling data for the SPE mode:
---
Profiles can be captured with perf on AArch64 machines with SPE enabled.
They can be combined with other events, SPE or not.

Capture only SPE branch data events:
```bash
perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
```

Capture any SPE events:
```bash
perf record -e 'arm_spe_0//u' -- BINARY
```

Capture any SPE events and cycles
```bash
perf record -e 'arm_spe_0//u' -e cycles:u -- BINARY
```

More filters, jitter, and specify count to control overheads/quality.
```bash
perf record -e 'arm_spe_0/branch_filter=1,load_filter=0,store_filter=0,jitter=1/u' -c 10007 -- BINARY
```
---
 bolt/include/bolt/Profile/DataAggregator.h    |  14 ++
 bolt/lib/Profile/DataAggregator.cpp           | 138 +++++++++++++-
 .../test/perf2bolt/AArch64/perf2bolt-spe.test |  14 ++
 bolt/test/perf2bolt/X86/perf2bolt-spe.test    |   9 +
 bolt/tools/driver/llvm-bolt.cpp               |   9 +
 bolt/unittests/Profile/CMakeLists.txt         |  14 ++
 bolt/unittests/Profile/PerfSpeEvents.cpp      | 173 ++++++++++++++++++
 7 files changed, 363 insertions(+), 8 deletions(-)
 create mode 100644 bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
 create mode 100644 bolt/test/perf2bolt/X86/perf2bolt-spe.test
 create mode 100644 bolt/unittests/Profile/PerfSpeEvents.cpp

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 3f07a6dc03a4f..e2f72a579cb75 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -85,6 +85,8 @@ class DataAggregator : public DataReader {
   };
   friend raw_ostream &operator<<(raw_ostream &OS, const LBREntry &);
 
+  friend struct PerfSpeEventsTestHelper;
+
   struct PerfBranchSample {
     SmallVector<LBREntry, 32> LBR;
   };
@@ -286,6 +288,15 @@ class DataAggregator : public DataReader {
   /// and a PC
   ErrorOr<PerfBasicSample> parseBasicSample();
 
+  /// Parse an Arm SPE entry into the non-lbr format by generating two basic
+  /// samples. The format of an input SPE entry is:
+  /// ```
+  /// PID   EVENT-TYPE   ADDR   IP
+  /// ```
+  /// SPE branch events will have 'ADDR' set to a branch target address while
+  /// other perf or SPE events will have it set to zero.
+  ErrorOr<std::pair<PerfBasicSample,PerfBasicSample>> parseSpeAsBasicSamples();
+
   /// Parse a single perf sample containing a PID associated with an IP and
   /// address.
   ErrorOr<PerfMemSample> parseMemSample();
@@ -332,6 +343,9 @@ class DataAggregator : public DataReader {
   /// Process non-LBR events.
   void processBasicEvents();
 
+  /// Parse Arm SPE events into the non-LBR format.
+  std::error_code parseSpeAsBasicEvents();
+
   /// Parse the full output generated by perf script to report memory events.
   std::error_code parseMemEvents();
 
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index ade8478f556e9..cb66f92245154 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -49,6 +49,13 @@ static cl::opt<bool>
                      cl::desc("aggregate basic samples (without LBR info)"),
                      cl::cat(AggregatorCategory));
 
+cl::opt<bool> ArmSPE(
+    "spe",
+    cl::desc(
+        "Enable Arm SPE mode. Used in conjuction with no-lbr mode, ie `--spe "
+        "--nl`"),
+    cl::cat(AggregatorCategory));
+
 static cl::opt<std::string>
     ITraceAggregation("itrace",
                       cl::desc("Generate LBR info with perf itrace argument"),
@@ -181,11 +188,19 @@ void DataAggregator::start() {
 
   findPerfExecutable();
 
-  if (opts::BasicAggregation) {
-    launchPerfProcess("events without LBR",
-                      MainEventsPPI,
+  if (opts::ArmSPE) {
+    if (!opts::BasicAggregation) {
+      errs() << "PERF2BOLT-ERROR: Arm SPE mode is combined only with "
+                "BasicAggregation.\n";
+      exit(1);
+    }
+    launchPerfProcess("branch events with SPE", MainEventsPPI,
+                      "script -F pid,event,ip,addr --itrace=i1i",
+                      /*Wait = */ false);
+  } else if (opts::BasicAggregation) {
+    launchPerfProcess("events without LBR", MainEventsPPI,
                       "script -F pid,event,ip",
-                      /*Wait = */false);
+                      /*Wait = */ false);
   } else if (!opts::ITraceAggregation.empty()) {
     // Disable parsing memory profile from trace data, unless requested by user.
     if (!opts::ParseMemProfile.getNumOccurrences())
@@ -456,14 +471,20 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   this->BC = &BC;
 
-  auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
+  const Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
+                     "Cannot print 'addr' field.");
+
+  auto ErrorCallback = [&NoData](int ReturnCode, StringRef ErrBuf) {
+    if (opts::ArmSPE && NoData.match(ErrBuf)) {
+      errs() << "PERF2BOLT-ERROR: perf data are incompatible for Arm SPE mode "
+                "consumption. ADDR attribute is unset.\n";
+      exit(1);
+    }
     errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
     exit(1);
   };
 
   auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
-    Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
-                 "Cannot print 'addr' field.");
     if (!NoData.match(ErrBuf))
       ErrorCallback(ReturnCode, ErrBuf);
   };
@@ -509,7 +530,8 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   filterBinaryMMapInfo();
   prepareToParse("events", MainEventsPPI, ErrorCallback);
 
-  if ((!opts::BasicAggregation && parseBranchEvents()) ||
+  if (((!opts::BasicAggregation && !opts::ArmSPE) && parseBranchEvents()) ||
+      (opts::BasicAggregation && opts::ArmSPE && parseSpeAsBasicEvents()) ||
       (opts::BasicAggregation && parseBasicEvents()))
     errs() << "PERF2BOLT: failed to parse samples\n";
 
@@ -1137,6 +1159,66 @@ ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
   return PerfBasicSample{Event.get(), Address};
 }
 
+ErrorOr<
+    std::pair<DataAggregator::PerfBasicSample, DataAggregator::PerfBasicSample>>
+DataAggregator::parseSpeAsBasicSamples() {
+  while (checkAndConsumeFS()) {
+  }
+
+  ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
+  if (std::error_code EC = PIDRes.getError())
+    return EC;
+
+  constexpr PerfBasicSample EmptySample = PerfBasicSample{StringRef(), 0};
+  auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
+  if (MMapInfoIter == BinaryMMapInfo.end()) {
+    consumeRestOfLine();
+    return std::make_pair(EmptySample, EmptySample);
+  }
+
+  while (checkAndConsumeFS()) {
+  }
+
+  ErrorOr<StringRef> Event = parseString(FieldSeparator);
+  if (std::error_code EC = Event.getError())
+    return EC;
+
+  while (checkAndConsumeFS()) {
+  }
+
+  ErrorOr<uint64_t> AddrResTo = parseHexField(FieldSeparator);
+  if (std::error_code EC = AddrResTo.getError())
+    return EC;
+  consumeAllRemainingFS();
+
+  ErrorOr<uint64_t> AddrResFrom = parseHexField(FieldSeparator, true);
+  if (std::error_code EC = AddrResFrom.getError())
+    return EC;
+
+  if (!checkAndConsumeNewLine()) {
+    reportError("expected end of line");
+    return make_error_code(llvm::errc::io_error);
+  }
+
+  auto genBasicSample = [&](uint64_t Address) {
+    // When fed with non SPE branch events the target address will be null.
+    // This is expected and ignored.
+    if (Address == 0x0)
+      return EmptySample;
+
+    if (!BC->HasFixedLoadAddress)
+      adjustAddress(Address, MMapInfoIter->second);
+    return PerfBasicSample{Event.get(), Address};
+  };
+
+  // Show more meaningful event names on boltdata.
+  if (Event->str() == "instructions:")
+    Event = *AddrResTo != 0x0 ? "branch-spe:" : "instruction-spe:";
+
+  return std::make_pair(genBasicSample(*AddrResFrom),
+                        genBasicSample(*AddrResTo));
+}
+
 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
   PerfMemSample Res{0, 0};
 
@@ -1636,6 +1718,46 @@ std::error_code DataAggregator::parseBasicEvents() {
   return std::error_code();
 }
 
+std::error_code DataAggregator::parseSpeAsBasicEvents() {
+  outs() << "PERF2BOLT: parsing SPE data as basic events (no LBR)...\n";
+  NamedRegionTimer T("parseSPEBasic", "Parsing SPE as basic events",
+                     TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
+  uint64_t NumSpeBranchSamples = 0;
+
+  // Convert entries to one or two basic samples, depending on whether there is
+  // branch target information.
+  while (hasData()) {
+    auto SamplePair = parseSpeAsBasicSamples();
+    if (std::error_code EC = SamplePair.getError())
+      return EC;
+
+    auto registerSample = [this](const PerfBasicSample *Sample) {
+      if (!Sample->PC)
+        return;
+
+      if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
+        BF->setHasProfileAvailable();
+
+      ++BasicSamples[Sample->PC];
+      EventNames.insert(Sample->EventName);
+    };
+
+    if (SamplePair->first.PC != 0x0 && SamplePair->second.PC != 0x0)
+      ++NumSpeBranchSamples;
+
+    registerSample(&SamplePair->first);
+    registerSample(&SamplePair->second);
+  }
+
+  if (NumSpeBranchSamples == 0)
+    errs() << "PERF2BOLT-WARNING: no SPE branches found\n";
+  else
+    outs() << "PERF2BOLT: found " << NumSpeBranchSamples
+           << " SPE branch sample pairs.\n";
+
+  return std::error_code();
+}
+
 void DataAggregator::processBasicEvents() {
   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
new file mode 100644
index 0000000000000..d7cea7ff769b8
--- /dev/null
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -0,0 +1,14 @@
+## Check that Arm SPE mode is available on AArch64 with BasicAggregation.
+
+REQUIRES: system-linux,perf,target=aarch64{{.*}}
+
+RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
+RUN: touch %t.empty.perf.data
+RUN: perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
+
+CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job
+
+RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe
+RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-LBR
+
+CHECK-SPE-LBR: PERF2BOLT-ERROR: Arm SPE mode is combined only with BasicAggregation.
diff --git a/bolt/test/perf2bolt/X86/perf2bolt-spe.test b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
new file mode 100644
index 0000000000000..f31c17f411137
--- /dev/null
+++ b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
@@ -0,0 +1,9 @@
+## Check that Arm SPE mode is unavailable on X86.
+
+REQUIRES: system-linux,x86_64-linux
+
+RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
+RUN: touch %t.empty.perf.data
+RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s
+
+CHECK: BOLT-ERROR: -spe is available only on AArch64.
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index b9836c2397b6b..66ccc8d0b65f4 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -51,6 +51,8 @@ static cl::opt<std::string> InputFilename(cl::Positional,
                                           cl::Required, cl::cat(BoltCategory),
                                           cl::sub(cl::SubCommand::getAll()));
 
+extern cl::opt<bool> ArmSPE;
+
 static cl::opt<std::string>
 InputDataFilename("data",
   cl::desc("<data file>"),
@@ -237,6 +239,13 @@ int main(int argc, char **argv) {
       if (Error E = RIOrErr.takeError())
         report_error(opts::InputFilename, std::move(E));
       RewriteInstance &RI = *RIOrErr.get();
+
+      if (opts::AggregateOnly && !RI.getBinaryContext().isAArch64() &&
+          opts::ArmSPE == 1) {
+        errs() << "BOLT-ERROR: -spe is available only on AArch64.\n";
+        exit(1);
+      }
+
       if (!opts::PerfData.empty()) {
         if (!opts::AggregateOnly) {
           errs() << ToolName
diff --git a/bolt/unittests/Profile/CMakeLists.txt b/bolt/unittests/Profile/CMakeLists.txt
index e0aa0926b49c0..ce01c6c4b949e 100644
--- a/bolt/unittests/Profile/CMakeLists.txt
+++ b/bolt/unittests/Profile/CMakeLists.txt
@@ -1,11 +1,25 @@
+set(LLVM_LINK_COMPONENTS
+  DebugInfoDWARF
+  Object
+  ${LLVM_TARGETS_TO_BUILD}
+  )
+
 add_bolt_unittest(ProfileTests
   DataAggregator.cpp
+  PerfSpeEvents.cpp
 
   DISABLE_LLVM_LINK_LLVM_DYLIB
   )
 
 target_link_libraries(ProfileTests
   PRIVATE
+  LLVMBOLTCore
   LLVMBOLTProfile
+  LLVMTargetParser
+  LLVMTestingSupport
   )
 
+foreach (tgt ${BOLT_TARGETS_TO_BUILD})
+  string(TOUPPER "${tgt}" upper)
+  target_compile_definitions(ProfileTests PRIVATE "${upper}_AVAILABLE")
+endforeach()
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
new file mode 100644
index 0000000000000..807a3bb1e07f4
--- /dev/null
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -0,0 +1,173 @@
+//===- bolt/unittests/Profile/PerfSpeEvents.cpp ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef AARCH64_AVAILABLE
+
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Profile/DataAggregator.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::bolt;
+using namespace llvm::object;
+using namespace llvm::ELF;
+
+namespace opts {
+extern cl::opt<std::string> ReadPerfEvents;
+} // namespace opts
+
+namespace llvm {
+namespace bolt {
+
+/// Perform checks on perf SPE branch events combined with other SPE or perf
+/// events.
+struct PerfSpeEventsTestHelper : public testing::Test {
+  void SetUp() override {
+    initalizeLLVM();
+    prepareElf();
+    initializeBOLT();
+  }
+
+protected:
+  void initalizeLLVM() {
+    llvm::InitializeAllTargetInfos();
+    llvm::InitializeAllTargetMCs();
+    llvm::InitializeAllAsmParsers();
+    llvm::InitializeAllDisassemblers();
+    llvm::InitializeAllTargets();
+    llvm::InitializeAllAsmPrinters();
+  }
+
+  void prepareElf() {
+    memcpy(ElfBuf, "\177ELF", 4);
+    ELF64LE::Ehdr *EHdr = reinterpret_cast<typename ELF64LE::Ehdr *>(ElfBuf);
+    EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64;
+    EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB;
+    EHdr->e_machine = llvm::ELF::EM_AARCH64;
+    MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF");
+    ObjFile = cantFail(ObjectFile::createObjectFile(Source));
+  }
+
+  void initializeBOLT() {
+    Relocation::Arch = ObjFile->makeTriple().getArch();
+    BC = cantFail(BinaryContext::createBinaryContext(
+        ObjFile->makeTriple(), std::make_shared<orc::SymbolStringPool>(),
+        ObjFile->getFileName(), nullptr, /*IsPIC*/ false,
+        DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()}));
+    ASSERT_FALSE(!BC);
+  }
+
+  char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {};
+  std::unique_ptr<ObjectFile> ObjFile;
+  std::unique_ptr<BinaryContext> BC;
+
+  /// Return true when the expected \p SampleSize profile data are generated and
+  /// contain all the \p ExpectedEventNames.
+  bool checkEvents(uint64_t PID, size_t SampleSize,
+                   const StringSet<> &ExpectedEventNames) {
+    DataAggregator DA("<pseudo input>");
+    DA.ParsingBuf = opts::ReadPerfEvents;
+    DA.BC = BC.get();
+    DataAggregator::MMapInfo MMap;
+    DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
+
+    DA.parseSpeAsBasicEvents();
+
+    for (auto &EE : ExpectedEventNames)
+      if (!DA.EventNames.contains(EE.first()))
+        return false;
+
+    return SampleSize == DA.BasicSamples.size();
+  }
+};
+
+} // namespace bolt
+} // namespace llvm
+
+// Check that DataAggregator can parseSpeAsBasicEvents for branch events when
+// combined with other event types.
+
+TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
+  // Check perf input with SPE branch events.
+  // Example collection command:
+  // ```
+  // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+  // ```
+
+  opts::ReadPerfEvents =
+      "1234          instructions:              a002    a001\n"
+      "1234          instructions:              b002    b001\n"
+      "1234          instructions:              c002    c001\n"
+      "1234          instructions:              d002    d001\n"
+      "1234          instructions:              e002    e001\n";
+
+  EXPECT_TRUE(checkEvents(1234, 10, {"branch-spe:"}));
+}
+
+TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
+  // Check perf input with SPE branch events and cycles.
+  // Example collection command:
+  // ```
+  // perf record -e cycles:u -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+  // ```
+
+  opts::ReadPerfEvents =
+      "1234          instructions:              a002    a001\n"
+      "1234              cycles:u:                 0    b001\n"
+      "1234              cycles:u:                 0    c001\n"
+      "1234          instructions:              d002    d001\n"
+      "1234          instructions:              e002    e001\n";
+
+  EXPECT_TRUE(checkEvents(1234, 8, {"branch-spe:", "cycles:u:"}));
+}
+
+TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
+  // Check perf input with any SPE event type and cycles.
+  // Example collection command:
+  // ```
+  // perf record -e cycles:u -e 'arm_spe_0//u' -- BINARY
+  // ```
+
+  opts::ReadPerfEvents =
+      "1234              cycles:u:                0     a001\n"
+      "1234              cycles:u:                0     b001\n"
+      "1234          instructions:                0     c001\n"
+      "1234          instructions:                0     d001\n"
+      "1234          instructions:              e002    e001\n";
+
+  EXPECT_TRUE(
+      checkEvents(1234, 6, {"cycles:u:", "instruction-spe:", "branch-spe:"}));
+}
+
+TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
+  // Check perf input that has no SPE branch pairs recorded.
+  // Example collection command:
+  // ```
+  // perf record -e cycles:u -e 'arm_spe_0/load_filter=1,branch_filter=0/u' --
+  // BINARY
+  // ```
+
+  testing::internal::CaptureStderr();
+  opts::ReadPerfEvents =
+      "1234          instructions:                 0    a001\n"
+      "1234              cycles:u:                 0    b001\n"
+      "1234          instructions:                 0    c001\n"
+      "1234              cycles:u:                 0    d001\n"
+      "1234          instructions:                 0    e001\n";
+
+  EXPECT_TRUE(checkEvents(1234, 5, {"instruction-spe:", "cycles:u:"}));
+
+  std::string Stderr = testing::internal::GetCapturedStderr();
+  EXPECT_EQ(Stderr, "PERF2BOLT-WARNING: no SPE branches found\n");
+}
+
+#endif

>From e574bbcdfb14bd3e01e4c09c290cea2dd38bec35 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Fri, 20 Dec 2024 15:13:40 +0000
Subject: [PATCH 02/16] clang-format fix

---
 bolt/include/bolt/Profile/DataAggregator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index e2f72a579cb75..d5110eac09ac2 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -295,7 +295,7 @@ class DataAggregator : public DataReader {
   /// ```
   /// SPE branch events will have 'ADDR' set to a branch target address while
   /// other perf or SPE events will have it set to zero.
-  ErrorOr<std::pair<PerfBasicSample,PerfBasicSample>> parseSpeAsBasicSamples();
+  ErrorOr<std::pair<PerfBasicSample, PerfBasicSample>> parseSpeAsBasicSamples();
 
   /// Parse a single perf sample containing a PID associated with an IP and
   /// address.

>From 6091d243808fb4e16a7ba7f86a954c2a4b9ff26e Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Wed, 15 Jan 2025 15:11:12 +0000
Subject: [PATCH 03/16] Addressing reviewers (1)

---
 bolt/include/bolt/Utils/CommandLineOpts.h |  1 +
 bolt/lib/Profile/DataAggregator.cpp       |  4 +++-
 bolt/tools/driver/llvm-bolt.cpp           |  4 +---
 bolt/unittests/Profile/PerfSpeEvents.cpp  | 10 +++++-----
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index 4acce5a3e8320..a75b6bf720ec4 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -48,6 +48,7 @@ extern llvm::cl::OptionCategory BinaryAnalysisCategory;
 extern llvm::cl::opt<unsigned> AlignText;
 extern llvm::cl::opt<unsigned> AlignFunctions;
 extern llvm::cl::opt<bool> AggregateOnly;
+extern llvm::cl::opt<bool> ArmSPE;
 extern llvm::cl::opt<unsigned> BucketsPerLine;
 extern llvm::cl::opt<bool> CompactCodeModel;
 extern llvm::cl::opt<bool> DiffOnly;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index cb66f92245154..f1317ed3f99cf 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1189,6 +1189,7 @@ DataAggregator::parseSpeAsBasicSamples() {
   ErrorOr<uint64_t> AddrResTo = parseHexField(FieldSeparator);
   if (std::error_code EC = AddrResTo.getError())
     return EC;
+
   consumeAllRemainingFS();
 
   ErrorOr<uint64_t> AddrResFrom = parseHexField(FieldSeparator, true);
@@ -1208,12 +1209,13 @@ DataAggregator::parseSpeAsBasicSamples() {
 
     if (!BC->HasFixedLoadAddress)
       adjustAddress(Address, MMapInfoIter->second);
+
     return PerfBasicSample{Event.get(), Address};
   };
 
   // Show more meaningful event names on boltdata.
   if (Event->str() == "instructions:")
-    Event = *AddrResTo != 0x0 ? "branch-spe:" : "instruction-spe:";
+    Event = *AddrResTo != 0x0 ? "branches-spe:" : "instructions-spe:";
 
   return std::make_pair(genBasicSample(*AddrResFrom),
                         genBasicSample(*AddrResTo));
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index 66ccc8d0b65f4..2e91118c00a83 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -51,8 +51,6 @@ static cl::opt<std::string> InputFilename(cl::Positional,
                                           cl::Required, cl::cat(BoltCategory),
                                           cl::sub(cl::SubCommand::getAll()));
 
-extern cl::opt<bool> ArmSPE;
-
 static cl::opt<std::string>
 InputDataFilename("data",
   cl::desc("<data file>"),
@@ -241,7 +239,7 @@ int main(int argc, char **argv) {
       RewriteInstance &RI = *RIOrErr.get();
 
       if (opts::AggregateOnly && !RI.getBinaryContext().isAArch64() &&
-          opts::ArmSPE == 1) {
+          opts::ArmSPE) {
         errs() << "BOLT-ERROR: -spe is available only on AArch64.\n";
         exit(1);
       }
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 807a3bb1e07f4..e52393b516fa3 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -110,7 +110,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
       "1234          instructions:              d002    d001\n"
       "1234          instructions:              e002    e001\n";
 
-  EXPECT_TRUE(checkEvents(1234, 10, {"branch-spe:"}));
+  EXPECT_TRUE(checkEvents(1234, 10, {"branches-spe:"}));
 }
 
 TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
@@ -127,7 +127,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
       "1234          instructions:              d002    d001\n"
       "1234          instructions:              e002    e001\n";
 
-  EXPECT_TRUE(checkEvents(1234, 8, {"branch-spe:", "cycles:u:"}));
+  EXPECT_TRUE(checkEvents(1234, 8, {"branches-spe:", "cycles:u:"}));
 }
 
 TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
@@ -144,8 +144,8 @@ TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
       "1234          instructions:                0     d001\n"
       "1234          instructions:              e002    e001\n";
 
-  EXPECT_TRUE(
-      checkEvents(1234, 6, {"cycles:u:", "instruction-spe:", "branch-spe:"}));
+  EXPECT_TRUE(checkEvents(1234, 6,
+                          {"cycles:u:", "instructions-spe:", "branches-spe:"}));
 }
 
 TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
@@ -164,7 +164,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
       "1234              cycles:u:                 0    d001\n"
       "1234          instructions:                 0    e001\n";
 
-  EXPECT_TRUE(checkEvents(1234, 5, {"instruction-spe:", "cycles:u:"}));
+  EXPECT_TRUE(checkEvents(1234, 5, {"instructions-spe:", "cycles:u:"}));
 
   std::string Stderr = testing::internal::GetCapturedStderr();
   EXPECT_EQ(Stderr, "PERF2BOLT-WARNING: no SPE branches found\n");

>From b911072d5f843459674b67598f3a5046c12028b9 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Fri, 17 Jan 2025 13:42:19 +0000
Subject: [PATCH 04/16] Addressing reviewers (2)

---
 bolt/test/perf2bolt/X86/perf2bolt-spe.test | 2 +-
 bolt/tools/driver/llvm-bolt.cpp            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/test/perf2bolt/X86/perf2bolt-spe.test b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
index f31c17f411137..ec24c44c4d13d 100644
--- a/bolt/test/perf2bolt/X86/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
@@ -6,4 +6,4 @@ RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.e
 RUN: touch %t.empty.perf.data
 RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s
 
-CHECK: BOLT-ERROR: -spe is available only on AArch64.
+CHECK: perf2bolt: -spe is available only on AArch64.
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index 2e91118c00a83..cf1b31f8c0c66 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -240,7 +240,7 @@ int main(int argc, char **argv) {
 
       if (opts::AggregateOnly && !RI.getBinaryContext().isAArch64() &&
           opts::ArmSPE) {
-        errs() << "BOLT-ERROR: -spe is available only on AArch64.\n";
+        errs() << ToolName << ": -spe is available only on AArch64.\n";
         exit(1);
       }
 

>From 7b40e4e018259c3294297e8675a4b20ae9fe8eed Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Wed, 19 Feb 2025 17:00:47 +0100
Subject: [PATCH 05/16] Add initial support for SPE brstack

Perf will be able to report SPE branch events as similar as it does
with LBR brstack.
Therefore we can utilize the existing LBR parsing process for SPE as well.

Example of the SPE brstack input format:
```bash
perf script -i perf.data -F pid,brstack --itrace=bl
```
```
---
PID    FROM         TO           PREDICTED
---
16984  0x72e342e5f4/0x72e36192d0/M/-/-/11/RET/-
16984  0x72e7b8b3b4/0x72e7b8b3b8/PN/-/-/11/COND/-
16984  0x72e7b92b48/0x72e7b92b4c/PN/-/-/8/COND/-
16984  0x72eacc6b7c/0x760cc94b00/P/-/-/9/RET/-
16984  0x72e3f210fc/0x72e3f21068/P/-/-/4//-
16984  0x72e39b8c5c/0x72e3627b24/P/-/-/4//-
16984  0x72e7b89d20/0x72e7b92bbc/P/-/-/4/RET/-
```
SPE brstack mispredicted flag might be two characters long: 'PN' or 'MN'.
Where 'N' means the branch was marked as NOT-TAKEN. This event is only related to
conditional instruction (conditional branch or compare-and-branch),
it tells that failed its condition code check.

Perf with 'brstack' support for SPE is available here:
```
https://github.com/Leo-Yan/linux/tree/perf_arm_spe_branch_flags_v2
```

Example of useage with SPE perf data:
```bash
perf2bolt -p perf.data -o perf.fdata --spe BINARY
```

Capture standard SPE branch events with perf:
```bash
perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
```

An unittest is also added to check parsing process of 'SPE brstack format'.
---
 bolt/lib/Profile/DataAggregator.cpp           | 70 ++++++++++++-------
 .../test/perf2bolt/AArch64/perf2bolt-spe.test | 15 ++--
 bolt/unittests/Profile/PerfSpeEvents.cpp      | 69 ++++++++++++++++++
 3 files changed, 122 insertions(+), 32 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index f1317ed3f99cf..a705107e0311a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -49,12 +49,10 @@ static cl::opt<bool>
                      cl::desc("aggregate basic samples (without LBR info)"),
                      cl::cat(AggregatorCategory));
 
-cl::opt<bool> ArmSPE(
-    "spe",
-    cl::desc(
-        "Enable Arm SPE mode. Used in conjuction with no-lbr mode, ie `--spe "
-        "--nl`"),
-    cl::cat(AggregatorCategory));
+cl::opt<bool> ArmSPE("spe",
+                     cl::desc("Enable Arm SPE mode. Can combine with `--nl` "
+                              "to use in no-lbr mode"),
+                     cl::cat(AggregatorCategory));
 
 static cl::opt<std::string>
     ITraceAggregation("itrace",
@@ -190,13 +188,16 @@ void DataAggregator::start() {
 
   if (opts::ArmSPE) {
     if (!opts::BasicAggregation) {
-      errs() << "PERF2BOLT-ERROR: Arm SPE mode is combined only with "
-                "BasicAggregation.\n";
-      exit(1);
+      // pid    from_ip      to_ip        predicted/missed not-taken?
+      // 12345  0x123/0x456/PN/-/-/8/RET/-
+      launchPerfProcess("SPE brstack events", MainEventsPPI,
+                        "script -F pid,brstack --itrace=bl",
+                        /*Wait = */ false);
+    } else {
+      launchPerfProcess("SPE branch events (non-lbr)", MainEventsPPI,
+                        "script -F pid,event,ip,addr --itrace=i1i",
+                        /*Wait = */ false);
     }
-    launchPerfProcess("branch events with SPE", MainEventsPPI,
-                      "script -F pid,event,ip,addr --itrace=i1i",
-                      /*Wait = */ false);
   } else if (opts::BasicAggregation) {
     launchPerfProcess("events without LBR", MainEventsPPI,
                       "script -F pid,event,ip",
@@ -530,7 +531,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   filterBinaryMMapInfo();
   prepareToParse("events", MainEventsPPI, ErrorCallback);
 
-  if (((!opts::BasicAggregation && !opts::ArmSPE) && parseBranchEvents()) ||
+  if ((!opts::BasicAggregation && parseBranchEvents()) ||
       (opts::BasicAggregation && opts::ArmSPE && parseSpeAsBasicEvents()) ||
       (opts::BasicAggregation && parseBasicEvents()))
     errs() << "PERF2BOLT: failed to parse samples\n";
@@ -1033,9 +1034,20 @@ ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() {
   if (std::error_code EC = MispredStrRes.getError())
     return EC;
   StringRef MispredStr = MispredStrRes.get();
-  if (MispredStr.size() != 1 ||
-      (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
-    reportError("expected single char for mispred bit");
+  // SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
+  bool ValidStrSize = opts::ArmSPE ?
+    MispredStr.size() >= 1 && MispredStr.size() <= 2 : MispredStr.size() == 1;
+  bool SpeTakenBitErr =
+         (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
+  bool PredictionBitErr =
+         !ValidStrSize ||
+         (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
+  if (SpeTakenBitErr)
+    reportError("expected 'N' as SPE prediction bit for a not-taken branch");
+  if (PredictionBitErr)
+    reportError("expected 'P', 'M' or '-' char as a prediction bit");
+
+ if (SpeTakenBitErr || PredictionBitErr) {
     Diag << "Found: " << MispredStr << "\n";
     return make_error_code(llvm::errc::io_error);
   }
@@ -1611,9 +1623,11 @@ void DataAggregator::printBranchStacksDiagnostics(
 }
 
 std::error_code DataAggregator::parseBranchEvents() {
-  outs() << "PERF2BOLT: parse branch events...\n";
-  NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
-                     TimerGroupDesc, opts::TimeAggregator);
+  std::string BranchEventTypeStr =
+      opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
+  outs() << "PERF2BOLT: " << BranchEventTypeStr << "...\n";
+  NamedRegionTimer T("parseBranch", "Parsing " + BranchEventTypeStr,
+                     TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
   uint64_t NumEntries = 0;
   uint64_t NumSamples = 0;
@@ -1639,7 +1653,8 @@ std::error_code DataAggregator::parseBranchEvents() {
     }
 
     NumEntries += Sample.LBR.size();
-    if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
+    if (this->BC->isX86() && BAT && Sample.LBR.size() == 32 &&
+        !NeedsSkylakeFix) {
       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
       NeedsSkylakeFix = true;
     }
@@ -1658,10 +1673,17 @@ std::error_code DataAggregator::parseBranchEvents() {
     if (NumSamples && NumSamplesNoLBR == NumSamples) {
       // Note: we don't know if perf2bolt is being used to parse memory samples
       // at this point. In this case, it is OK to parse zero LBRs.
-      errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
-                "LBR. Record profile with perf record -j any or run perf2bolt "
-                "in no-LBR mode with -nl (the performance improvement in -nl "
-                "mode may be limited)\n";
+      if (!opts::ArmSPE)
+        errs()
+            << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
+               "LBR. Record profile with perf record -j any or run perf2bolt "
+               "in no-LBR mode with -nl (the performance improvement in -nl "
+               "mode may be limited)\n";
+      else
+        errs()
+            << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
+               "SPE brstack entries. Record profile with:"
+               "perf record arm_spe_0/branch_filter=1/";
     } else {
       printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
     }
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
index d7cea7ff769b8..95b8e205331a1 100644
--- a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -1,14 +1,13 @@
-## Check that Arm SPE mode is available on AArch64 with BasicAggregation.
+## Check that Arm SPE mode is available on AArch64.
 
 REQUIRES: system-linux,perf,target=aarch64{{.*}}
 
-RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
-RUN: touch %t.empty.perf.data
-RUN: perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
+RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe 2> /dev/null
 
-CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job
+RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe --nl %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
 
-RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe
-RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-LBR
+RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-LBR
+
+CHECK-SPE-NO-LBR: PERF2BOLT: spawning perf job to read SPE branch events (non-lbr)
+CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE brstack events
 
-CHECK-SPE-LBR: PERF2BOLT-ERROR: Arm SPE mode is combined only with BasicAggregation.
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index e52393b516fa3..9209f75147781 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -23,6 +23,7 @@ using namespace llvm::ELF;
 
 namespace opts {
 extern cl::opt<std::string> ReadPerfEvents;
+extern cl::opt<bool> ArmSPE;
 } // namespace opts
 
 namespace llvm {
@@ -38,6 +39,8 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   }
 
 protected:
+  using LBREntry = DataAggregator::LBREntry;
+
   void initalizeLLVM() {
     llvm::InitializeAllTargetInfos();
     llvm::InitializeAllTargetMCs();
@@ -88,6 +91,45 @@ struct PerfSpeEventsTestHelper : public testing::Test {
 
     return SampleSize == DA.BasicSamples.size();
   }
+
+  /// Compare LBREntries
+  bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
+    return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
+           Lhs.Mispred == Rhs.Mispred;
+  }
+
+  /// Parse and check SPE brstack as LBR
+  void parseAndCheckBrstackEvents(
+      uint64_t PID,
+      const std::vector<SmallVector<LBREntry, 2>> &ExpectedSamples) {
+    int NumSamples = 0;
+
+    DataAggregator DA("<pseudo input>");
+    DA.ParsingBuf = opts::ReadPerfEvents;
+    DA.BC = BC.get();
+    DataAggregator::MMapInfo MMap;
+    DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
+
+    // Process buffer.
+    while (DA.hasData()) {
+      ErrorOr<DataAggregator::PerfBranchSample> SampleRes =
+          DA.parseBranchSample();
+      if (std::error_code EC = SampleRes.getError())
+        EXPECT_NE(EC, std::errc::no_such_process);
+
+      DataAggregator::PerfBranchSample &Sample = SampleRes.get();
+      EXPECT_EQ(Sample.LBR.size(), ExpectedSamples[NumSamples].size());
+
+      // Check the parsed LBREntries.
+      const auto *ActualIter = Sample.LBR.begin();
+      const auto *ExpectIter = ExpectedSamples[NumSamples].begin();
+      while (ActualIter != Sample.LBR.end() &&
+             ExpectIter != ExpectedSamples[NumSamples].end())
+        EXPECT_TRUE(checkLBREntry(*ActualIter++, *ExpectIter++));
+
+      ++NumSamples;
+    }
+  }
 };
 
 } // namespace bolt
@@ -113,6 +155,33 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
   EXPECT_TRUE(checkEvents(1234, 10, {"branches-spe:"}));
 }
 
+TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
+  // Check perf input with SPE branch events as brstack format.
+  // Example collection command:
+  // ```
+  // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+  // ```
+  // How Bolt extracts the branch events:
+  // ```
+  // perf script -F pid,brstack --itrace=bl
+  // ```
+
+  opts::ArmSPE = true;
+  opts::ReadPerfEvents = "  1234  0xa001/0xa002/PN/-/-/10/COND/-\n"
+                         "  1234  0xb001/0xb002/P/-/-/4/RET/-\n"
+                         "  1234  0xc001/0xc002/P/-/-/13/-/-\n"
+                         "  1234  0xd001/0xd002/M/-/-/7/RET/-\n"
+                         "  1234  0xe001/0xe002/P/-/-/14/RET/-\n"
+                         "  1234  0xf001/0xf002/MN/-/-/8/COND/-\n";
+
+  std::vector<SmallVector<LBREntry, 2>> ExpectedSamples = {
+      {{{0xa001, 0xa002, false}}}, {{{0xb001, 0xb002, false}}},
+      {{{0xc001, 0xc002, false}}}, {{{0xd001, 0xd002, true}}},
+      {{{0xe001, 0xe002, false}}}, {{{0xf001, 0xf002, true}}},
+  };
+  parseAndCheckBrstackEvents(1234, ExpectedSamples);
+}
+
 TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
   // Check perf input with SPE branch events and cycles.
   // Example collection command:

>From 0edb27098a2382e42f4174c083af878c34cde2e3 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Thu, 10 Apr 2025 15:37:29 +0200
Subject: [PATCH 06/16] Fix format issue

---
 bolt/lib/Profile/DataAggregator.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index a705107e0311a..53880dbfc670a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1035,19 +1035,20 @@ ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() {
     return EC;
   StringRef MispredStr = MispredStrRes.get();
   // SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
-  bool ValidStrSize = opts::ArmSPE ?
-    MispredStr.size() >= 1 && MispredStr.size() <= 2 : MispredStr.size() == 1;
+  bool ValidStrSize = opts::ArmSPE
+                          ? MispredStr.size() >= 1 && MispredStr.size() <= 2
+                          : MispredStr.size() == 1;
   bool SpeTakenBitErr =
-         (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
+      (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
   bool PredictionBitErr =
-         !ValidStrSize ||
-         (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
+      !ValidStrSize ||
+      (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
   if (SpeTakenBitErr)
     reportError("expected 'N' as SPE prediction bit for a not-taken branch");
   if (PredictionBitErr)
     reportError("expected 'P', 'M' or '-' char as a prediction bit");
 
- if (SpeTakenBitErr || PredictionBitErr) {
+  if (SpeTakenBitErr || PredictionBitErr) {
     Diag << "Found: " << MispredStr << "\n";
     return make_error_code(llvm::errc::io_error);
   }

>From 27f1e13c1dd894fdf9f9d11e716e334073500351 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Tue, 15 Apr 2025 12:50:16 +0200
Subject: [PATCH 07/16] Fix typo

---
 bolt/lib/Profile/DataAggregator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 53880dbfc670a..cfe7b410e3cee 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1625,7 +1625,7 @@ void DataAggregator::printBranchStacksDiagnostics(
 
 std::error_code DataAggregator::parseBranchEvents() {
   std::string BranchEventTypeStr =
-      opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
+      !opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
   outs() << "PERF2BOLT: " << BranchEventTypeStr << "...\n";
   NamedRegionTimer T("parseBranch", "Parsing " + BranchEventTypeStr,
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);

>From 41441ae520d18b4f96cecb5b70ebf866de0a2c75 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Thu, 29 May 2025 09:42:03 +0200
Subject: [PATCH 08/16] Removing dependency of the SPE BasicAggregation

This commit aim is to uncouple the SPE BRStack and SPE BasicAggregation approaches
based on the decision in issue #115333.

BRStack change relies on the unit test logic which was introduced by
Paschalis Mpeis (ARM) in #120741. Since it is a common part of the two aggregation
type technique, needs to retain an essential part of it.

All relevant tests to BasicAggregation is removed.

Co-Authored-By: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
---
 bolt/include/bolt/Profile/DataAggregator.h    |  12 --
 bolt/lib/Profile/DataAggregator.cpp           | 131 ++----------------
 .../test/perf2bolt/AArch64/perf2bolt-spe.test |   3 -
 bolt/unittests/Profile/PerfSpeEvents.cpp      |  96 -------------
 4 files changed, 8 insertions(+), 234 deletions(-)

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index d5110eac09ac2..ce9a6630a4d2c 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -288,15 +288,6 @@ class DataAggregator : public DataReader {
   /// and a PC
   ErrorOr<PerfBasicSample> parseBasicSample();
 
-  /// Parse an Arm SPE entry into the non-lbr format by generating two basic
-  /// samples. The format of an input SPE entry is:
-  /// ```
-  /// PID   EVENT-TYPE   ADDR   IP
-  /// ```
-  /// SPE branch events will have 'ADDR' set to a branch target address while
-  /// other perf or SPE events will have it set to zero.
-  ErrorOr<std::pair<PerfBasicSample, PerfBasicSample>> parseSpeAsBasicSamples();
-
   /// Parse a single perf sample containing a PID associated with an IP and
   /// address.
   ErrorOr<PerfMemSample> parseMemSample();
@@ -343,9 +334,6 @@ class DataAggregator : public DataReader {
   /// Process non-LBR events.
   void processBasicEvents();
 
-  /// Parse Arm SPE events into the non-LBR format.
-  std::error_code parseSpeAsBasicEvents();
-
   /// Parse the full output generated by perf script to report memory events.
   std::error_code parseMemEvents();
 
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index cfe7b410e3cee..6dc6499fdd335 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -187,17 +187,11 @@ void DataAggregator::start() {
   findPerfExecutable();
 
   if (opts::ArmSPE) {
-    if (!opts::BasicAggregation) {
-      // pid    from_ip      to_ip        predicted/missed not-taken?
-      // 12345  0x123/0x456/PN/-/-/8/RET/-
-      launchPerfProcess("SPE brstack events", MainEventsPPI,
-                        "script -F pid,brstack --itrace=bl",
-                        /*Wait = */ false);
-    } else {
-      launchPerfProcess("SPE branch events (non-lbr)", MainEventsPPI,
-                        "script -F pid,event,ip,addr --itrace=i1i",
-                        /*Wait = */ false);
-    }
+    // pid    from_ip      to_ip        predicted/missed not-taken?
+    // 12345  0x123/0x456/PN/-/-/8/RET/-
+    launchPerfProcess("SPE brstack events", MainEventsPPI,
+                      "script -F pid,brstack --itrace=bl",
+                      /*Wait = */ false);
   } else if (opts::BasicAggregation) {
     launchPerfProcess("events without LBR", MainEventsPPI,
                       "script -F pid,event,ip",
@@ -472,20 +466,14 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   this->BC = &BC;
 
-  const Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
-                     "Cannot print 'addr' field.");
-
-  auto ErrorCallback = [&NoData](int ReturnCode, StringRef ErrBuf) {
-    if (opts::ArmSPE && NoData.match(ErrBuf)) {
-      errs() << "PERF2BOLT-ERROR: perf data are incompatible for Arm SPE mode "
-                "consumption. ADDR attribute is unset.\n";
-      exit(1);
-    }
+  auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
     errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
     exit(1);
   };
 
   auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
+    Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
+                 "Cannot print 'addr' field.");
     if (!NoData.match(ErrBuf))
       ErrorCallback(ReturnCode, ErrBuf);
   };
@@ -532,7 +520,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   prepareToParse("events", MainEventsPPI, ErrorCallback);
 
   if ((!opts::BasicAggregation && parseBranchEvents()) ||
-      (opts::BasicAggregation && opts::ArmSPE && parseSpeAsBasicEvents()) ||
       (opts::BasicAggregation && parseBasicEvents()))
     errs() << "PERF2BOLT: failed to parse samples\n";
 
@@ -1172,68 +1159,6 @@ ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
   return PerfBasicSample{Event.get(), Address};
 }
 
-ErrorOr<
-    std::pair<DataAggregator::PerfBasicSample, DataAggregator::PerfBasicSample>>
-DataAggregator::parseSpeAsBasicSamples() {
-  while (checkAndConsumeFS()) {
-  }
-
-  ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
-  if (std::error_code EC = PIDRes.getError())
-    return EC;
-
-  constexpr PerfBasicSample EmptySample = PerfBasicSample{StringRef(), 0};
-  auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
-  if (MMapInfoIter == BinaryMMapInfo.end()) {
-    consumeRestOfLine();
-    return std::make_pair(EmptySample, EmptySample);
-  }
-
-  while (checkAndConsumeFS()) {
-  }
-
-  ErrorOr<StringRef> Event = parseString(FieldSeparator);
-  if (std::error_code EC = Event.getError())
-    return EC;
-
-  while (checkAndConsumeFS()) {
-  }
-
-  ErrorOr<uint64_t> AddrResTo = parseHexField(FieldSeparator);
-  if (std::error_code EC = AddrResTo.getError())
-    return EC;
-
-  consumeAllRemainingFS();
-
-  ErrorOr<uint64_t> AddrResFrom = parseHexField(FieldSeparator, true);
-  if (std::error_code EC = AddrResFrom.getError())
-    return EC;
-
-  if (!checkAndConsumeNewLine()) {
-    reportError("expected end of line");
-    return make_error_code(llvm::errc::io_error);
-  }
-
-  auto genBasicSample = [&](uint64_t Address) {
-    // When fed with non SPE branch events the target address will be null.
-    // This is expected and ignored.
-    if (Address == 0x0)
-      return EmptySample;
-
-    if (!BC->HasFixedLoadAddress)
-      adjustAddress(Address, MMapInfoIter->second);
-
-    return PerfBasicSample{Event.get(), Address};
-  };
-
-  // Show more meaningful event names on boltdata.
-  if (Event->str() == "instructions:")
-    Event = *AddrResTo != 0x0 ? "branches-spe:" : "instructions-spe:";
-
-  return std::make_pair(genBasicSample(*AddrResFrom),
-                        genBasicSample(*AddrResTo));
-}
-
 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
   PerfMemSample Res{0, 0};
 
@@ -1743,46 +1668,6 @@ std::error_code DataAggregator::parseBasicEvents() {
   return std::error_code();
 }
 
-std::error_code DataAggregator::parseSpeAsBasicEvents() {
-  outs() << "PERF2BOLT: parsing SPE data as basic events (no LBR)...\n";
-  NamedRegionTimer T("parseSPEBasic", "Parsing SPE as basic events",
-                     TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
-  uint64_t NumSpeBranchSamples = 0;
-
-  // Convert entries to one or two basic samples, depending on whether there is
-  // branch target information.
-  while (hasData()) {
-    auto SamplePair = parseSpeAsBasicSamples();
-    if (std::error_code EC = SamplePair.getError())
-      return EC;
-
-    auto registerSample = [this](const PerfBasicSample *Sample) {
-      if (!Sample->PC)
-        return;
-
-      if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
-        BF->setHasProfileAvailable();
-
-      ++BasicSamples[Sample->PC];
-      EventNames.insert(Sample->EventName);
-    };
-
-    if (SamplePair->first.PC != 0x0 && SamplePair->second.PC != 0x0)
-      ++NumSpeBranchSamples;
-
-    registerSample(&SamplePair->first);
-    registerSample(&SamplePair->second);
-  }
-
-  if (NumSpeBranchSamples == 0)
-    errs() << "PERF2BOLT-WARNING: no SPE branches found\n";
-  else
-    outs() << "PERF2BOLT: found " << NumSpeBranchSamples
-           << " SPE branch sample pairs.\n";
-
-  return std::error_code();
-}
-
 void DataAggregator::processBasicEvents() {
   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
index 95b8e205331a1..11cb4b5b762d2 100644
--- a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -4,10 +4,7 @@ REQUIRES: system-linux,perf,target=aarch64{{.*}}
 
 RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe 2> /dev/null
 
-RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe --nl %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
-
 RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-LBR
 
-CHECK-SPE-NO-LBR: PERF2BOLT: spawning perf job to read SPE branch events (non-lbr)
 CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE brstack events
 
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 9209f75147781..205cc742b6268 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -73,25 +73,6 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   std::unique_ptr<ObjectFile> ObjFile;
   std::unique_ptr<BinaryContext> BC;
 
-  /// Return true when the expected \p SampleSize profile data are generated and
-  /// contain all the \p ExpectedEventNames.
-  bool checkEvents(uint64_t PID, size_t SampleSize,
-                   const StringSet<> &ExpectedEventNames) {
-    DataAggregator DA("<pseudo input>");
-    DA.ParsingBuf = opts::ReadPerfEvents;
-    DA.BC = BC.get();
-    DataAggregator::MMapInfo MMap;
-    DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
-
-    DA.parseSpeAsBasicEvents();
-
-    for (auto &EE : ExpectedEventNames)
-      if (!DA.EventNames.contains(EE.first()))
-        return false;
-
-    return SampleSize == DA.BasicSamples.size();
-  }
-
   /// Compare LBREntries
   bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
     return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
@@ -135,26 +116,6 @@ struct PerfSpeEventsTestHelper : public testing::Test {
 } // namespace bolt
 } // namespace llvm
 
-// Check that DataAggregator can parseSpeAsBasicEvents for branch events when
-// combined with other event types.
-
-TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
-  // Check perf input with SPE branch events.
-  // Example collection command:
-  // ```
-  // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
-  // ```
-
-  opts::ReadPerfEvents =
-      "1234          instructions:              a002    a001\n"
-      "1234          instructions:              b002    b001\n"
-      "1234          instructions:              c002    c001\n"
-      "1234          instructions:              d002    d001\n"
-      "1234          instructions:              e002    e001\n";
-
-  EXPECT_TRUE(checkEvents(1234, 10, {"branches-spe:"}));
-}
-
 TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
   // Check perf input with SPE branch events as brstack format.
   // Example collection command:
@@ -182,61 +143,4 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
   parseAndCheckBrstackEvents(1234, ExpectedSamples);
 }
 
-TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
-  // Check perf input with SPE branch events and cycles.
-  // Example collection command:
-  // ```
-  // perf record -e cycles:u -e 'arm_spe_0/branch_filter=1/u' -- BINARY
-  // ```
-
-  opts::ReadPerfEvents =
-      "1234          instructions:              a002    a001\n"
-      "1234              cycles:u:                 0    b001\n"
-      "1234              cycles:u:                 0    c001\n"
-      "1234          instructions:              d002    d001\n"
-      "1234          instructions:              e002    e001\n";
-
-  EXPECT_TRUE(checkEvents(1234, 8, {"branches-spe:", "cycles:u:"}));
-}
-
-TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
-  // Check perf input with any SPE event type and cycles.
-  // Example collection command:
-  // ```
-  // perf record -e cycles:u -e 'arm_spe_0//u' -- BINARY
-  // ```
-
-  opts::ReadPerfEvents =
-      "1234              cycles:u:                0     a001\n"
-      "1234              cycles:u:                0     b001\n"
-      "1234          instructions:                0     c001\n"
-      "1234          instructions:                0     d001\n"
-      "1234          instructions:              e002    e001\n";
-
-  EXPECT_TRUE(checkEvents(1234, 6,
-                          {"cycles:u:", "instructions-spe:", "branches-spe:"}));
-}
-
-TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
-  // Check perf input that has no SPE branch pairs recorded.
-  // Example collection command:
-  // ```
-  // perf record -e cycles:u -e 'arm_spe_0/load_filter=1,branch_filter=0/u' --
-  // BINARY
-  // ```
-
-  testing::internal::CaptureStderr();
-  opts::ReadPerfEvents =
-      "1234          instructions:                 0    a001\n"
-      "1234              cycles:u:                 0    b001\n"
-      "1234          instructions:                 0    c001\n"
-      "1234              cycles:u:                 0    d001\n"
-      "1234          instructions:                 0    e001\n";
-
-  EXPECT_TRUE(checkEvents(1234, 5, {"instructions-spe:", "cycles:u:"}));
-
-  std::string Stderr = testing::internal::GetCapturedStderr();
-  EXPECT_EQ(Stderr, "PERF2BOLT-WARNING: no SPE branches found\n");
-}
-
 #endif

>From 5be2b19906a1cafc4c4fe48fe702fc8950740c03 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Thu, 5 Jun 2025 15:06:28 +0200
Subject: [PATCH 09/16] Address reviewers 2

---
 bolt/lib/Profile/DataAggregator.cpp           | 21 ++++++++++++-------
 .../test/perf2bolt/AArch64/perf2bolt-spe.test |  2 ++
 bolt/test/perf2bolt/X86/perf2bolt-spe.test    |  2 +-
 bolt/unittests/Profile/PerfSpeEvents.cpp      | 15 ++++++-------
 4 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 6dc6499fdd335..63bc2b2cfe8ca 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -49,9 +49,7 @@ static cl::opt<bool>
                      cl::desc("aggregate basic samples (without LBR info)"),
                      cl::cat(AggregatorCategory));
 
-cl::opt<bool> ArmSPE("spe",
-                     cl::desc("Enable Arm SPE mode. Can combine with `--nl` "
-                              "to use in no-lbr mode"),
+cl::opt<bool> ArmSPE("spe", cl::desc("Enable Arm SPE mode."),
                      cl::cat(AggregatorCategory));
 
 static cl::opt<std::string>
@@ -187,7 +185,10 @@ void DataAggregator::start() {
   findPerfExecutable();
 
   if (opts::ArmSPE) {
-    // pid    from_ip      to_ip        predicted/missed not-taken?
+    // pid    from_ip      to_ip        flags
+    // where flags could be:
+    // P/M: whether branch was Predicted or Mispredicted.
+    // N: optionally appears when the branch was Not-Taken (ie fall-through)
     // 12345  0x123/0x456/PN/-/-/8/RET/-
     launchPerfProcess("SPE brstack events", MainEventsPPI,
                       "script -F pid,brstack --itrace=bl",
@@ -1021,7 +1022,8 @@ ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() {
   if (std::error_code EC = MispredStrRes.getError())
     return EC;
   StringRef MispredStr = MispredStrRes.get();
-  // SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
+  // SPE brstack mispredicted flags might be up to two characters long:
+  // 'PN' or 'MN'. Where 'N' optionally appears.
   bool ValidStrSize = opts::ArmSPE
                           ? MispredStr.size() >= 1 && MispredStr.size() <= 2
                           : MispredStr.size() == 1;
@@ -1551,7 +1553,7 @@ void DataAggregator::printBranchStacksDiagnostics(
 std::error_code DataAggregator::parseBranchEvents() {
   std::string BranchEventTypeStr =
       !opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
-  outs() << "PERF2BOLT: " << BranchEventTypeStr << "...\n";
+  outs() << "PERF2BOLT: parse " << BranchEventTypeStr << "...\n";
   NamedRegionTimer T("parseBranch", "Parsing " + BranchEventTypeStr,
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
@@ -1608,8 +1610,11 @@ std::error_code DataAggregator::parseBranchEvents() {
       else
         errs()
             << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
-               "SPE brstack entries. Record profile with:"
-               "perf record arm_spe_0/branch_filter=1/";
+               "SPE brstack entries. The minimum required version of "
+               "Linux-perf is v6.14 or higher for brstack support. "
+               "With an older Linux-perf you may get zero samples. "
+               "Plese also make sure about you recorded profile with: "
+               "perf record -e 'arm_spe_0/branch_filter=1/'.";
     } else {
       printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
     }
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
index 11cb4b5b762d2..2ee62976da4d9 100644
--- a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -2,6 +2,8 @@
 
 REQUIRES: system-linux,perf,target=aarch64{{.*}}
 
+RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
+
 RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe 2> /dev/null
 
 RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-LBR
diff --git a/bolt/test/perf2bolt/X86/perf2bolt-spe.test b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
index ec24c44c4d13d..8eed2c8595098 100644
--- a/bolt/test/perf2bolt/X86/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
@@ -4,6 +4,6 @@ REQUIRES: system-linux,x86_64-linux
 
 RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
 RUN: touch %t.empty.perf.data
-RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s
+RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --spe --pa %t.exe 2>&1 | FileCheck %s
 
 CHECK: perf2bolt: -spe is available only on AArch64.
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 205cc742b6268..b8ff0a1d972a3 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -73,13 +73,13 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   std::unique_ptr<ObjectFile> ObjFile;
   std::unique_ptr<BinaryContext> BC;
 
-  /// Compare LBREntries
+  // @return true if LBREntries are equal.
   bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
     return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
            Lhs.Mispred == Rhs.Mispred;
   }
 
-  /// Parse and check SPE brstack as LBR
+  // Parse and check SPE brstack as LBR.
   void parseAndCheckBrstackEvents(
       uint64_t PID,
       const std::vector<SmallVector<LBREntry, 2>> &ExpectedSamples) {
@@ -102,12 +102,9 @@ struct PerfSpeEventsTestHelper : public testing::Test {
       EXPECT_EQ(Sample.LBR.size(), ExpectedSamples[NumSamples].size());
 
       // Check the parsed LBREntries.
-      const auto *ActualIter = Sample.LBR.begin();
-      const auto *ExpectIter = ExpectedSamples[NumSamples].begin();
-      while (ActualIter != Sample.LBR.end() &&
-             ExpectIter != ExpectedSamples[NumSamples].end())
-        EXPECT_TRUE(checkLBREntry(*ActualIter++, *ExpectIter++));
-
+      for (auto [Actual, Expected] :
+           zip_equal(Sample.LBR, ExpectedSamples[NumSamples]))
+        EXPECT_TRUE(checkLBREntry(Actual, Expected));
       ++NumSamples;
     }
   }
@@ -135,7 +132,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
                          "  1234  0xe001/0xe002/P/-/-/14/RET/-\n"
                          "  1234  0xf001/0xf002/MN/-/-/8/COND/-\n";
 
-  std::vector<SmallVector<LBREntry, 2>> ExpectedSamples = {
+  std::vector<SmallVector<LBREntry>> ExpectedSamples = {
       {{{0xa001, 0xa002, false}}}, {{{0xb001, 0xb002, false}}},
       {{{0xc001, 0xc002, false}}}, {{{0xd001, 0xd002, true}}},
       {{{0xe001, 0xe002, false}}}, {{{0xf001, 0xf002, true}}},

>From 80bdb41e8b428997e1912f2e05981a4a8f16b6f4 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Tue, 10 Jun 2025 17:40:09 +0200
Subject: [PATCH 10/16] Simplifies SpeBranchesWithBrstack testcase

The test could be simplified after #143288 PR since
the validation phase is removed from parseLBRSample.
Now we can use branchLBRs container for the testing.
Formerly if Bolt was supplied with mock addresses, branchLBRs container
was empty due to validation phase.
---
 bolt/unittests/Profile/PerfSpeEvents.cpp | 44 ++++++------------------
 1 file changed, 10 insertions(+), 34 deletions(-)

diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index b8ff0a1d972a3..674af93578486 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -39,7 +39,7 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   }
 
 protected:
-  using LBREntry = DataAggregator::LBREntry;
+  using Trace = DataAggregator::Trace;
 
   void initalizeLLVM() {
     llvm::InitializeAllTargetInfos();
@@ -73,40 +73,20 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   std::unique_ptr<ObjectFile> ObjFile;
   std::unique_ptr<BinaryContext> BC;
 
-  // @return true if LBREntries are equal.
-  bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
-    return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
-           Lhs.Mispred == Rhs.Mispred;
-  }
-
   // Parse and check SPE brstack as LBR.
-  void parseAndCheckBrstackEvents(
-      uint64_t PID,
-      const std::vector<SmallVector<LBREntry, 2>> &ExpectedSamples) {
-    int NumSamples = 0;
-
+  void parseAndCheckBrstackEvents(uint64_t PID, uint64_t From, uint64_t To,
+                                  uint64_t Count, size_t SampleSize) {
     DataAggregator DA("<pseudo input>");
     DA.ParsingBuf = opts::ReadPerfEvents;
     DA.BC = BC.get();
     DataAggregator::MMapInfo MMap;
     DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
 
-    // Process buffer.
-    while (DA.hasData()) {
-      ErrorOr<DataAggregator::PerfBranchSample> SampleRes =
-          DA.parseBranchSample();
-      if (std::error_code EC = SampleRes.getError())
-        EXPECT_NE(EC, std::errc::no_such_process);
-
-      DataAggregator::PerfBranchSample &Sample = SampleRes.get();
-      EXPECT_EQ(Sample.LBR.size(), ExpectedSamples[NumSamples].size());
-
-      // Check the parsed LBREntries.
-      for (auto [Actual, Expected] :
-           zip_equal(Sample.LBR, ExpectedSamples[NumSamples]))
-        EXPECT_TRUE(checkLBREntry(Actual, Expected));
-      ++NumSamples;
-    }
+    DA.parseBranchEvents();
+
+    EXPECT_EQ(DA.BranchLBRs.size(), SampleSize);
+    EXPECT_EQ(DA.BranchLBRs[Trace(From, To)].MispredCount, Count);
+    EXPECT_EQ(DA.BranchLBRs[Trace(From, To)].TakenCount, Count);
   }
 };
 
@@ -130,14 +110,10 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
                          "  1234  0xc001/0xc002/P/-/-/13/-/-\n"
                          "  1234  0xd001/0xd002/M/-/-/7/RET/-\n"
                          "  1234  0xe001/0xe002/P/-/-/14/RET/-\n"
+                         "  1234  0xd001/0xd002/M/-/-/7/RET/-\n"
                          "  1234  0xf001/0xf002/MN/-/-/8/COND/-\n";
 
-  std::vector<SmallVector<LBREntry>> ExpectedSamples = {
-      {{{0xa001, 0xa002, false}}}, {{{0xb001, 0xb002, false}}},
-      {{{0xc001, 0xc002, false}}}, {{{0xd001, 0xd002, true}}},
-      {{{0xe001, 0xe002, false}}}, {{{0xf001, 0xf002, true}}},
-  };
-  parseAndCheckBrstackEvents(1234, ExpectedSamples);
+  parseAndCheckBrstackEvents(1234, 0xd001, 0xd002, 2, 6);
 }
 
 #endif

>From b2f5aa84eca9a91533d6eb88bb52717686bfa2c7 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Thu, 12 Jun 2025 18:36:14 +0200
Subject: [PATCH 11/16] Added the suggested changes

---
 bolt/unittests/Profile/PerfSpeEvents.cpp | 58 +++++++++++++++++++-----
 1 file changed, 47 insertions(+), 11 deletions(-)

diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 674af93578486..4df1c7d62f56a 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -40,6 +40,12 @@ struct PerfSpeEventsTestHelper : public testing::Test {
 
 protected:
   using Trace = DataAggregator::Trace;
+  struct MockBranchInfo {
+    uint64_t From;
+    uint64_t To;
+    uint64_t TakenCount;
+    uint64_t MispredCount;
+  };
 
   void initalizeLLVM() {
     llvm::InitializeAllTargetInfos();
@@ -74,8 +80,8 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   std::unique_ptr<BinaryContext> BC;
 
   // Parse and check SPE brstack as LBR.
-  void parseAndCheckBrstackEvents(uint64_t PID, uint64_t From, uint64_t To,
-                                  uint64_t Count, size_t SampleSize) {
+  void parseAndCheckBrstackEvents(
+      uint64_t PID, const std::vector<MockBranchInfo> &ExpectedSamples) {
     DataAggregator DA("<pseudo input>");
     DA.ParsingBuf = opts::ReadPerfEvents;
     DA.BC = BC.get();
@@ -84,9 +90,27 @@ struct PerfSpeEventsTestHelper : public testing::Test {
 
     DA.parseBranchEvents();
 
-    EXPECT_EQ(DA.BranchLBRs.size(), SampleSize);
-    EXPECT_EQ(DA.BranchLBRs[Trace(From, To)].MispredCount, Count);
-    EXPECT_EQ(DA.BranchLBRs[Trace(From, To)].TakenCount, Count);
+    EXPECT_EQ(DA.BranchLBRs.size(), ExpectedSamples.size());
+    if (DA.BranchLBRs.size() != ExpectedSamples.size()) {
+      // Simple export where they differ
+      llvm::errs() << "BranchLBRs items: \n";
+      for (const auto &AggrLBR : DA.BranchLBRs)
+        llvm::errs() << "{" << AggrLBR.first.From << ", " << AggrLBR.first.To
+                     << ", " << AggrLBR.second.TakenCount << ", "
+                     << AggrLBR.second.MispredCount << "}" << "\n";
+
+      llvm::errs() << "Expected items: \n";
+      for (const MockBranchInfo &BI : ExpectedSamples)
+        llvm::errs() << "{" << BI.From << ", " << BI.To << ", " << BI.TakenCount
+                     << ", " << BI.MispredCount << "}" << "\n";
+    } else {
+      for (const MockBranchInfo &BI : ExpectedSamples) {
+        EXPECT_EQ(DA.BranchLBRs.at(Trace(BI.From, BI.To)).MispredCount,
+                  BI.MispredCount);
+        EXPECT_EQ(DA.BranchLBRs.at(Trace(BI.From, BI.To)).TakenCount,
+                  BI.TakenCount);
+      }
+    }
   }
 };
 
@@ -107,13 +131,25 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
   opts::ArmSPE = true;
   opts::ReadPerfEvents = "  1234  0xa001/0xa002/PN/-/-/10/COND/-\n"
                          "  1234  0xb001/0xb002/P/-/-/4/RET/-\n"
-                         "  1234  0xc001/0xc002/P/-/-/13/-/-\n"
-                         "  1234  0xd001/0xd002/M/-/-/7/RET/-\n"
+                         "  1234  0xc456/0xc789/P/-/-/13/-/-\n"
+                         "  1234  0xd123/0xd456/M/-/-/7/RET/-\n"
                          "  1234  0xe001/0xe002/P/-/-/14/RET/-\n"
-                         "  1234  0xd001/0xd002/M/-/-/7/RET/-\n"
-                         "  1234  0xf001/0xf002/MN/-/-/8/COND/-\n";
-
-  parseAndCheckBrstackEvents(1234, 0xd001, 0xd002, 2, 6);
+                         "  1234  0xd123/0xd456/M/-/-/7/RET/-\n"
+                         "  1234  0xf001/0xf002/MN/-/-/8/COND/-\n"
+                         "  1234  0xc456/0xc789/M/-/-/13/-/-\n";
+
+  // MockBranchInfo contains the aggregated information about
+  // a Branch {From, To, TakenCount, MispredCount}.
+  // Let's check the following example: {0xd123, 0xd456, 2, 2}.
+  // This entry has a TakenCount = 2,
+  // as we have two samples for (0xd123, 0xd456) in our input.
+  // It also has MispredsCount = 2, as 'M' misprediction flag
+  // appears in both cases.
+  std::vector<MockBranchInfo> ExpectedSamples = {
+      {0xa001, 0xa002, 1, 0}, {0xb001, 0xb002, 1, 0}, {0xc456, 0xc789, 2, 1},
+      {0xd123, 0xd456, 2, 2}, {0xe001, 0xe002, 1, 0}, {0xf001, 0xf002, 1, 1}};
+
+  parseAndCheckBrstackEvents(1234, ExpectedSamples);
 }
 
 #endif

>From 5651631be9d2618a3fe80de632f5596592a83e7a Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Fri, 13 Jun 2025 09:25:05 +0200
Subject: [PATCH 12/16] Fixes typo

---
 bolt/lib/Profile/DataAggregator.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 63bc2b2cfe8ca..001d9a7d2a7bc 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1609,12 +1609,10 @@ std::error_code DataAggregator::parseBranchEvents() {
                "mode may be limited)\n";
       else
         errs()
-            << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
-               "SPE brstack entries. The minimum required version of "
-               "Linux-perf is v6.14 or higher for brstack support. "
-               "With an older Linux-perf you may get zero samples. "
-               "Plese also make sure about you recorded profile with: "
-               "perf record -e 'arm_spe_0/branch_filter=1/'.";
+            << "PERF2BOLT-WARNING: All recorded samples for this binary lack "
+               "SPE brstack entries. Make sure you are running Linux perf 6.14 "
+               "or later, otherwise you get zero samples. Record the profile with:"
+               "perf record -e 'arm_spe_0/branch_filter=1/'."
     } else {
       printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
     }

>From 0ec7079d329dfc7ac5c845b2260db7afd43bf0da Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Fri, 13 Jun 2025 09:32:03 +0200
Subject: [PATCH 13/16] clang format fix

---
 bolt/lib/Profile/DataAggregator.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 001d9a7d2a7bc..be557154f7d00 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1611,8 +1611,8 @@ std::error_code DataAggregator::parseBranchEvents() {
         errs()
             << "PERF2BOLT-WARNING: All recorded samples for this binary lack "
                "SPE brstack entries. Make sure you are running Linux perf 6.14 "
-               "or later, otherwise you get zero samples. Record the profile with:"
-               "perf record -e 'arm_spe_0/branch_filter=1/'."
+               "or later, otherwise you get zero samples. Record the profile "
+               "with: perf record -e 'arm_spe_0/branch_filter=1/'.";
     } else {
       printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
     }

>From 18ba358bbf93dc7f5a583319fda5ff7d3350609b Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Mon, 16 Jun 2025 12:39:40 +0200
Subject: [PATCH 14/16] Updated the test case.

---
 bolt/unittests/Profile/CMakeLists.txt    |  2 +
 bolt/unittests/Profile/PerfSpeEvents.cpp | 50 +++++++++++++++---------
 2 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/bolt/unittests/Profile/CMakeLists.txt b/bolt/unittests/Profile/CMakeLists.txt
index ce01c6c4b949e..197b0dcade269 100644
--- a/bolt/unittests/Profile/CMakeLists.txt
+++ b/bolt/unittests/Profile/CMakeLists.txt
@@ -22,4 +22,6 @@ target_link_libraries(ProfileTests
 foreach (tgt ${BOLT_TARGETS_TO_BUILD})
   string(TOUPPER "${tgt}" upper)
   target_compile_definitions(ProfileTests PRIVATE "${upper}_AVAILABLE")
+  # Enable exceptions for GTEST EXPECT_NO_THROW
+  target_compile_options(ProfileTests PRIVATE "-fexceptions")
 endforeach()
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 4df1c7d62f56a..89e8093a26f40 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -40,6 +40,8 @@ struct PerfSpeEventsTestHelper : public testing::Test {
 
 protected:
   using Trace = DataAggregator::Trace;
+  using TakenBranchInfo = DataAggregator::TakenBranchInfo;
+  using TraceHash = DataAggregator::TraceHash;
   struct MockBranchInfo {
     uint64_t From;
     uint64_t To;
@@ -79,6 +81,23 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   std::unique_ptr<ObjectFile> ObjFile;
   std::unique_ptr<BinaryContext> BC;
 
+  // Helper function to export lists to show the mismatch
+  void exportBrStackEventMismatch(
+      const std::unordered_map<Trace, TakenBranchInfo, TraceHash> &BranchLBRs,
+      const std::vector<MockBranchInfo> &ExpectedSamples) {
+    // Simple export where they differ
+    llvm::errs() << "BranchLBRs items: \n";
+    for (const auto &AggrLBR : BranchLBRs)
+      llvm::errs() << "{" << AggrLBR.first.From << ", " << AggrLBR.first.To
+                   << ", " << AggrLBR.second.TakenCount << ", "
+                   << AggrLBR.second.MispredCount << "}" << "\n";
+
+    llvm::errs() << "Expected items: \n";
+    for (const MockBranchInfo &BI : ExpectedSamples)
+      llvm::errs() << "{" << BI.From << ", " << BI.To << ", " << BI.TakenCount
+                   << ", " << BI.MispredCount << "}" << "\n";
+  }
+
   // Parse and check SPE brstack as LBR.
   void parseAndCheckBrstackEvents(
       uint64_t PID, const std::vector<MockBranchInfo> &ExpectedSamples) {
@@ -91,25 +110,18 @@ struct PerfSpeEventsTestHelper : public testing::Test {
     DA.parseBranchEvents();
 
     EXPECT_EQ(DA.BranchLBRs.size(), ExpectedSamples.size());
-    if (DA.BranchLBRs.size() != ExpectedSamples.size()) {
-      // Simple export where they differ
-      llvm::errs() << "BranchLBRs items: \n";
-      for (const auto &AggrLBR : DA.BranchLBRs)
-        llvm::errs() << "{" << AggrLBR.first.From << ", " << AggrLBR.first.To
-                     << ", " << AggrLBR.second.TakenCount << ", "
-                     << AggrLBR.second.MispredCount << "}" << "\n";
-
-      llvm::errs() << "Expected items: \n";
-      for (const MockBranchInfo &BI : ExpectedSamples)
-        llvm::errs() << "{" << BI.From << ", " << BI.To << ", " << BI.TakenCount
-                     << ", " << BI.MispredCount << "}" << "\n";
-    } else {
-      for (const MockBranchInfo &BI : ExpectedSamples) {
-        EXPECT_EQ(DA.BranchLBRs.at(Trace(BI.From, BI.To)).MispredCount,
-                  BI.MispredCount);
-        EXPECT_EQ(DA.BranchLBRs.at(Trace(BI.From, BI.To)).TakenCount,
-                  BI.TakenCount);
-      }
+    if (DA.BranchLBRs.size() != ExpectedSamples.size())
+      exportBrStackEventMismatch(DA.BranchLBRs, ExpectedSamples);
+
+    for (const MockBranchInfo &BI : ExpectedSamples) {
+      /// Check whether the key exists, throws 'std::out_of_range'
+      /// if the container does not have an element with the specified key.
+      EXPECT_NO_THROW(DA.BranchLBRs.at(Trace(BI.From, BI.To)));
+
+      EXPECT_EQ(DA.BranchLBRs.at(Trace(BI.From, BI.To)).MispredCount,
+                BI.MispredCount);
+      EXPECT_EQ(DA.BranchLBRs.at(Trace(BI.From, BI.To)).TakenCount,
+                BI.TakenCount);
     }
   }
 };

>From 3ddcc988cb877dfc111151c66d34c643db832991 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Mon, 16 Jun 2025 16:25:10 +0200
Subject: [PATCH 15/16] updated comments

---
 bolt/unittests/Profile/PerfSpeEvents.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 89e8093a26f40..f1215dc98fa85 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -81,11 +81,10 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   std::unique_ptr<ObjectFile> ObjFile;
   std::unique_ptr<BinaryContext> BC;
 
-  // Helper function to export lists to show the mismatch
-  void exportBrStackEventMismatch(
+  /// Helper function to export lists to show the mismatch.
+  void reportBrStackEventMismatch(
       const std::unordered_map<Trace, TakenBranchInfo, TraceHash> &BranchLBRs,
       const std::vector<MockBranchInfo> &ExpectedSamples) {
-    // Simple export where they differ
     llvm::errs() << "BranchLBRs items: \n";
     for (const auto &AggrLBR : BranchLBRs)
       llvm::errs() << "{" << AggrLBR.first.From << ", " << AggrLBR.first.To
@@ -98,7 +97,7 @@ struct PerfSpeEventsTestHelper : public testing::Test {
                    << ", " << BI.MispredCount << "}" << "\n";
   }
 
-  // Parse and check SPE brstack as LBR.
+  /// Parse and check SPE brstack as LBR.
   void parseAndCheckBrstackEvents(
       uint64_t PID, const std::vector<MockBranchInfo> &ExpectedSamples) {
     DataAggregator DA("<pseudo input>");
@@ -111,11 +110,10 @@ struct PerfSpeEventsTestHelper : public testing::Test {
 
     EXPECT_EQ(DA.BranchLBRs.size(), ExpectedSamples.size());
     if (DA.BranchLBRs.size() != ExpectedSamples.size())
-      exportBrStackEventMismatch(DA.BranchLBRs, ExpectedSamples);
+      reportBrStackEventMismatch(DA.BranchLBRs, ExpectedSamples);
 
     for (const MockBranchInfo &BI : ExpectedSamples) {
-      /// Check whether the key exists, throws 'std::out_of_range'
-      /// if the container does not have an element with the specified key.
+      // Check that each key exists and that it matches.
       EXPECT_NO_THROW(DA.BranchLBRs.at(Trace(BI.From, BI.To)));
 
       EXPECT_EQ(DA.BranchLBRs.at(Trace(BI.From, BI.To)).MispredCount,

>From 7d3baeaf83f797eb5cddf830bccfd6d6c203c324 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Mon, 16 Jun 2025 16:47:06 +0200
Subject: [PATCH 16/16] Address reviewers 3

---
 bolt/lib/Profile/DataAggregator.cpp      | 2 +-
 bolt/unittests/Profile/PerfSpeEvents.cpp | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index be557154f7d00..2ad16c191d021 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1552,7 +1552,7 @@ void DataAggregator::printBranchStacksDiagnostics(
 
 std::error_code DataAggregator::parseBranchEvents() {
   std::string BranchEventTypeStr =
-      !opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
+      opts::ArmSPE ? "SPE branch events in LBR-format" : "branch events";
   outs() << "PERF2BOLT: parse " << BranchEventTypeStr << "...\n";
   NamedRegionTimer T("parseBranch", "Parsing " + BranchEventTypeStr,
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index f1215dc98fa85..d19548e9094b8 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -29,8 +29,7 @@ extern cl::opt<bool> ArmSPE;
 namespace llvm {
 namespace bolt {
 
-/// Perform checks on perf SPE branch events combined with other SPE or perf
-/// events.
+/// Perform checks on perf SPE branch events.
 struct PerfSpeEventsTestHelper : public testing::Test {
   void SetUp() override {
     initalizeLLVM();



More information about the llvm-commits mailing list