[llvm] Add initial support for SPE brstack format (PR #129231)

Ádám Kallai via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 10 08:57:49 PDT 2025


https://github.com/kaadam updated https://github.com/llvm/llvm-project/pull/129231

>From aac6b1dc460e63c0d661e4ea698277236829b88f Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <paschalis.mpeis at arm.com>
Date: Fri, 20 Dec 2024 14:19:01 +0000
Subject: [PATCH 01/10] [BOLT][AArch64] Introduce SPE mode in BasicAggregation

BOLT gains the ability to process branch target information generated by
Arm SPE data, using the `BasicAggregation` format.

Example usage is:
```bash
perf2bolt -p perf.data -o perf.boltdata --nl --spe BINARY
```

New branch data and compatibility:
---
SPE branch entries in perf data contain a branch pair (`IP` -> `ADDR`)
for the source and destination branches. DataAggregator processes those
by creating two basic samples. Any other event types will have `ADDR`
field set to `0x0`. For those a single sample will be created. Such
events can be either SPE or non-SPE, like `l1d-access` and `cycles`
respectively.

The format of the input perf entries is:
```
PID   EVENT-TYPE   ADDR   IP
```

When on SPE mode and:
- host is not `AArch64`, BOLT will exit with a relevant message
- `ADDR` field is unavailable, BOLT will exit with a relevant message
- no branch pairs were recorded, BOLT will present a warning

Examples of generating profiling data for the SPE mode:
---
Profiles can be captured with perf on AArch64 machines with SPE enabled.
They can be combined with other events, SPE or not.

Capture only SPE branch data events:
```bash
perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
```

Capture any SPE events:
```bash
perf record -e 'arm_spe_0//u' -- BINARY
```

Capture any SPE events and cycles
```bash
perf record -e 'arm_spe_0//u' -e cycles:u -- BINARY
```

More filters, jitter, and specify count to control overheads/quality.
```bash
perf record -e 'arm_spe_0/branch_filter=1,load_filter=0,store_filter=0,jitter=1/u' -c 10007 -- BINARY
```
---
 bolt/include/bolt/Profile/DataAggregator.h    |  14 ++
 bolt/lib/Profile/DataAggregator.cpp           | 138 +++++++++++++-
 .../test/perf2bolt/AArch64/perf2bolt-spe.test |  14 ++
 bolt/test/perf2bolt/X86/perf2bolt-spe.test    |   9 +
 bolt/tools/driver/llvm-bolt.cpp               |   9 +
 bolt/unittests/Profile/CMakeLists.txt         |  14 ++
 bolt/unittests/Profile/PerfSpeEvents.cpp      | 173 ++++++++++++++++++
 7 files changed, 363 insertions(+), 8 deletions(-)
 create mode 100644 bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
 create mode 100644 bolt/test/perf2bolt/X86/perf2bolt-spe.test
 create mode 100644 bolt/unittests/Profile/PerfSpeEvents.cpp

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 3f07a6dc03a4f..e2f72a579cb75 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -85,6 +85,8 @@ class DataAggregator : public DataReader {
   };
   friend raw_ostream &operator<<(raw_ostream &OS, const LBREntry &);
 
+  friend struct PerfSpeEventsTestHelper;
+
   struct PerfBranchSample {
     SmallVector<LBREntry, 32> LBR;
   };
@@ -286,6 +288,15 @@ class DataAggregator : public DataReader {
   /// and a PC
   ErrorOr<PerfBasicSample> parseBasicSample();
 
+  /// Parse an Arm SPE entry into the non-lbr format by generating two basic
+  /// samples. The format of an input SPE entry is:
+  /// ```
+  /// PID   EVENT-TYPE   ADDR   IP
+  /// ```
+  /// SPE branch events will have 'ADDR' set to a branch target address while
+  /// other perf or SPE events will have it set to zero.
+  ErrorOr<std::pair<PerfBasicSample,PerfBasicSample>> parseSpeAsBasicSamples();
+
   /// Parse a single perf sample containing a PID associated with an IP and
   /// address.
   ErrorOr<PerfMemSample> parseMemSample();
@@ -332,6 +343,9 @@ class DataAggregator : public DataReader {
   /// Process non-LBR events.
   void processBasicEvents();
 
+  /// Parse Arm SPE events into the non-LBR format.
+  std::error_code parseSpeAsBasicEvents();
+
   /// Parse the full output generated by perf script to report memory events.
   std::error_code parseMemEvents();
 
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 4022212bcf1b6..ffc3e5a6fa112 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -49,6 +49,13 @@ static cl::opt<bool>
                      cl::desc("aggregate basic samples (without LBR info)"),
                      cl::cat(AggregatorCategory));
 
+cl::opt<bool> ArmSPE(
+    "spe",
+    cl::desc(
+        "Enable Arm SPE mode. Used in conjuction with no-lbr mode, ie `--spe "
+        "--nl`"),
+    cl::cat(AggregatorCategory));
+
 static cl::opt<std::string>
     ITraceAggregation("itrace",
                       cl::desc("Generate LBR info with perf itrace argument"),
@@ -175,11 +182,19 @@ void DataAggregator::start() {
 
   findPerfExecutable();
 
-  if (opts::BasicAggregation) {
-    launchPerfProcess("events without LBR",
-                      MainEventsPPI,
+  if (opts::ArmSPE) {
+    if (!opts::BasicAggregation) {
+      errs() << "PERF2BOLT-ERROR: Arm SPE mode is combined only with "
+                "BasicAggregation.\n";
+      exit(1);
+    }
+    launchPerfProcess("branch events with SPE", MainEventsPPI,
+                      "script -F pid,event,ip,addr --itrace=i1i",
+                      /*Wait = */ false);
+  } else if (opts::BasicAggregation) {
+    launchPerfProcess("events without LBR", MainEventsPPI,
                       "script -F pid,event,ip",
-                      /*Wait = */false);
+                      /*Wait = */ false);
   } else if (!opts::ITraceAggregation.empty()) {
     std::string ItracePerfScriptArgs = llvm::formatv(
         "script -F pid,brstack --itrace={0}", opts::ITraceAggregation);
@@ -448,14 +463,20 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   this->BC = &BC;
 
-  auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
+  const Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
+                     "Cannot print 'addr' field.");
+
+  auto ErrorCallback = [&NoData](int ReturnCode, StringRef ErrBuf) {
+    if (opts::ArmSPE && NoData.match(ErrBuf)) {
+      errs() << "PERF2BOLT-ERROR: perf data are incompatible for Arm SPE mode "
+                "consumption. ADDR attribute is unset.\n";
+      exit(1);
+    }
     errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
     exit(1);
   };
 
   auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
-    Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
-                 "Cannot print 'addr' field.");
     if (!NoData.match(ErrBuf))
       ErrorCallback(ReturnCode, ErrBuf);
   };
@@ -501,7 +522,8 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   filterBinaryMMapInfo();
   prepareToParse("events", MainEventsPPI, ErrorCallback);
 
-  if ((!opts::BasicAggregation && parseBranchEvents()) ||
+  if (((!opts::BasicAggregation && !opts::ArmSPE) && parseBranchEvents()) ||
+      (opts::BasicAggregation && opts::ArmSPE && parseSpeAsBasicEvents()) ||
       (opts::BasicAggregation && parseBasicEvents()))
     errs() << "PERF2BOLT: failed to parse samples\n";
 
@@ -1128,6 +1150,66 @@ ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
   return PerfBasicSample{Event.get(), Address};
 }
 
+ErrorOr<
+    std::pair<DataAggregator::PerfBasicSample, DataAggregator::PerfBasicSample>>
+DataAggregator::parseSpeAsBasicSamples() {
+  while (checkAndConsumeFS()) {
+  }
+
+  ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
+  if (std::error_code EC = PIDRes.getError())
+    return EC;
+
+  constexpr PerfBasicSample EmptySample = PerfBasicSample{StringRef(), 0};
+  auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
+  if (MMapInfoIter == BinaryMMapInfo.end()) {
+    consumeRestOfLine();
+    return std::make_pair(EmptySample, EmptySample);
+  }
+
+  while (checkAndConsumeFS()) {
+  }
+
+  ErrorOr<StringRef> Event = parseString(FieldSeparator);
+  if (std::error_code EC = Event.getError())
+    return EC;
+
+  while (checkAndConsumeFS()) {
+  }
+
+  ErrorOr<uint64_t> AddrResTo = parseHexField(FieldSeparator);
+  if (std::error_code EC = AddrResTo.getError())
+    return EC;
+  consumeAllRemainingFS();
+
+  ErrorOr<uint64_t> AddrResFrom = parseHexField(FieldSeparator, true);
+  if (std::error_code EC = AddrResFrom.getError())
+    return EC;
+
+  if (!checkAndConsumeNewLine()) {
+    reportError("expected end of line");
+    return make_error_code(llvm::errc::io_error);
+  }
+
+  auto genBasicSample = [&](uint64_t Address) {
+    // When fed with non SPE branch events the target address will be null.
+    // This is expected and ignored.
+    if (Address == 0x0)
+      return EmptySample;
+
+    if (!BC->HasFixedLoadAddress)
+      adjustAddress(Address, MMapInfoIter->second);
+    return PerfBasicSample{Event.get(), Address};
+  };
+
+  // Show more meaningful event names on boltdata.
+  if (Event->str() == "instructions:")
+    Event = *AddrResTo != 0x0 ? "branch-spe:" : "instruction-spe:";
+
+  return std::make_pair(genBasicSample(*AddrResFrom),
+                        genBasicSample(*AddrResTo));
+}
+
 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
   PerfMemSample Res{0, 0};
 
@@ -1627,6 +1709,46 @@ std::error_code DataAggregator::parseBasicEvents() {
   return std::error_code();
 }
 
+std::error_code DataAggregator::parseSpeAsBasicEvents() {
+  outs() << "PERF2BOLT: parsing SPE data as basic events (no LBR)...\n";
+  NamedRegionTimer T("parseSPEBasic", "Parsing SPE as basic events",
+                     TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
+  uint64_t NumSpeBranchSamples = 0;
+
+  // Convert entries to one or two basic samples, depending on whether there is
+  // branch target information.
+  while (hasData()) {
+    auto SamplePair = parseSpeAsBasicSamples();
+    if (std::error_code EC = SamplePair.getError())
+      return EC;
+
+    auto registerSample = [this](const PerfBasicSample *Sample) {
+      if (!Sample->PC)
+        return;
+
+      if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
+        BF->setHasProfileAvailable();
+
+      ++BasicSamples[Sample->PC];
+      EventNames.insert(Sample->EventName);
+    };
+
+    if (SamplePair->first.PC != 0x0 && SamplePair->second.PC != 0x0)
+      ++NumSpeBranchSamples;
+
+    registerSample(&SamplePair->first);
+    registerSample(&SamplePair->second);
+  }
+
+  if (NumSpeBranchSamples == 0)
+    errs() << "PERF2BOLT-WARNING: no SPE branches found\n";
+  else
+    outs() << "PERF2BOLT: found " << NumSpeBranchSamples
+           << " SPE branch sample pairs.\n";
+
+  return std::error_code();
+}
+
 void DataAggregator::processBasicEvents() {
   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
new file mode 100644
index 0000000000000..d7cea7ff769b8
--- /dev/null
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -0,0 +1,14 @@
+## Check that Arm SPE mode is available on AArch64 with BasicAggregation.
+
+REQUIRES: system-linux,perf,target=aarch64{{.*}}
+
+RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
+RUN: touch %t.empty.perf.data
+RUN: perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
+
+CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job
+
+RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe
+RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-LBR
+
+CHECK-SPE-LBR: PERF2BOLT-ERROR: Arm SPE mode is combined only with BasicAggregation.
diff --git a/bolt/test/perf2bolt/X86/perf2bolt-spe.test b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
new file mode 100644
index 0000000000000..f31c17f411137
--- /dev/null
+++ b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
@@ -0,0 +1,9 @@
+## Check that Arm SPE mode is unavailable on X86.
+
+REQUIRES: system-linux,x86_64-linux
+
+RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
+RUN: touch %t.empty.perf.data
+RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s
+
+CHECK: BOLT-ERROR: -spe is available only on AArch64.
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index b9836c2397b6b..66ccc8d0b65f4 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -51,6 +51,8 @@ static cl::opt<std::string> InputFilename(cl::Positional,
                                           cl::Required, cl::cat(BoltCategory),
                                           cl::sub(cl::SubCommand::getAll()));
 
+extern cl::opt<bool> ArmSPE;
+
 static cl::opt<std::string>
 InputDataFilename("data",
   cl::desc("<data file>"),
@@ -237,6 +239,13 @@ int main(int argc, char **argv) {
       if (Error E = RIOrErr.takeError())
         report_error(opts::InputFilename, std::move(E));
       RewriteInstance &RI = *RIOrErr.get();
+
+      if (opts::AggregateOnly && !RI.getBinaryContext().isAArch64() &&
+          opts::ArmSPE == 1) {
+        errs() << "BOLT-ERROR: -spe is available only on AArch64.\n";
+        exit(1);
+      }
+
       if (!opts::PerfData.empty()) {
         if (!opts::AggregateOnly) {
           errs() << ToolName
diff --git a/bolt/unittests/Profile/CMakeLists.txt b/bolt/unittests/Profile/CMakeLists.txt
index e0aa0926b49c0..ce01c6c4b949e 100644
--- a/bolt/unittests/Profile/CMakeLists.txt
+++ b/bolt/unittests/Profile/CMakeLists.txt
@@ -1,11 +1,25 @@
+set(LLVM_LINK_COMPONENTS
+  DebugInfoDWARF
+  Object
+  ${LLVM_TARGETS_TO_BUILD}
+  )
+
 add_bolt_unittest(ProfileTests
   DataAggregator.cpp
+  PerfSpeEvents.cpp
 
   DISABLE_LLVM_LINK_LLVM_DYLIB
   )
 
 target_link_libraries(ProfileTests
   PRIVATE
+  LLVMBOLTCore
   LLVMBOLTProfile
+  LLVMTargetParser
+  LLVMTestingSupport
   )
 
+foreach (tgt ${BOLT_TARGETS_TO_BUILD})
+  string(TOUPPER "${tgt}" upper)
+  target_compile_definitions(ProfileTests PRIVATE "${upper}_AVAILABLE")
+endforeach()
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
new file mode 100644
index 0000000000000..807a3bb1e07f4
--- /dev/null
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -0,0 +1,173 @@
+//===- bolt/unittests/Profile/PerfSpeEvents.cpp ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef AARCH64_AVAILABLE
+
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Profile/DataAggregator.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::bolt;
+using namespace llvm::object;
+using namespace llvm::ELF;
+
+namespace opts {
+extern cl::opt<std::string> ReadPerfEvents;
+} // namespace opts
+
+namespace llvm {
+namespace bolt {
+
+/// Perform checks on perf SPE branch events combined with other SPE or perf
+/// events.
+struct PerfSpeEventsTestHelper : public testing::Test {
+  void SetUp() override {
+    initalizeLLVM();
+    prepareElf();
+    initializeBOLT();
+  }
+
+protected:
+  void initalizeLLVM() {
+    llvm::InitializeAllTargetInfos();
+    llvm::InitializeAllTargetMCs();
+    llvm::InitializeAllAsmParsers();
+    llvm::InitializeAllDisassemblers();
+    llvm::InitializeAllTargets();
+    llvm::InitializeAllAsmPrinters();
+  }
+
+  void prepareElf() {
+    memcpy(ElfBuf, "\177ELF", 4);
+    ELF64LE::Ehdr *EHdr = reinterpret_cast<typename ELF64LE::Ehdr *>(ElfBuf);
+    EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64;
+    EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB;
+    EHdr->e_machine = llvm::ELF::EM_AARCH64;
+    MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF");
+    ObjFile = cantFail(ObjectFile::createObjectFile(Source));
+  }
+
+  void initializeBOLT() {
+    Relocation::Arch = ObjFile->makeTriple().getArch();
+    BC = cantFail(BinaryContext::createBinaryContext(
+        ObjFile->makeTriple(), std::make_shared<orc::SymbolStringPool>(),
+        ObjFile->getFileName(), nullptr, /*IsPIC*/ false,
+        DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()}));
+    ASSERT_FALSE(!BC);
+  }
+
+  char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {};
+  std::unique_ptr<ObjectFile> ObjFile;
+  std::unique_ptr<BinaryContext> BC;
+
+  /// Return true when the expected \p SampleSize profile data are generated and
+  /// contain all the \p ExpectedEventNames.
+  bool checkEvents(uint64_t PID, size_t SampleSize,
+                   const StringSet<> &ExpectedEventNames) {
+    DataAggregator DA("<pseudo input>");
+    DA.ParsingBuf = opts::ReadPerfEvents;
+    DA.BC = BC.get();
+    DataAggregator::MMapInfo MMap;
+    DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
+
+    DA.parseSpeAsBasicEvents();
+
+    for (auto &EE : ExpectedEventNames)
+      if (!DA.EventNames.contains(EE.first()))
+        return false;
+
+    return SampleSize == DA.BasicSamples.size();
+  }
+};
+
+} // namespace bolt
+} // namespace llvm
+
+// Check that DataAggregator can parseSpeAsBasicEvents for branch events when
+// combined with other event types.
+
+TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
+  // Check perf input with SPE branch events.
+  // Example collection command:
+  // ```
+  // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+  // ```
+
+  opts::ReadPerfEvents =
+      "1234          instructions:              a002    a001\n"
+      "1234          instructions:              b002    b001\n"
+      "1234          instructions:              c002    c001\n"
+      "1234          instructions:              d002    d001\n"
+      "1234          instructions:              e002    e001\n";
+
+  EXPECT_TRUE(checkEvents(1234, 10, {"branch-spe:"}));
+}
+
+TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
+  // Check perf input with SPE branch events and cycles.
+  // Example collection command:
+  // ```
+  // perf record -e cycles:u -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+  // ```
+
+  opts::ReadPerfEvents =
+      "1234          instructions:              a002    a001\n"
+      "1234              cycles:u:                 0    b001\n"
+      "1234              cycles:u:                 0    c001\n"
+      "1234          instructions:              d002    d001\n"
+      "1234          instructions:              e002    e001\n";
+
+  EXPECT_TRUE(checkEvents(1234, 8, {"branch-spe:", "cycles:u:"}));
+}
+
+TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
+  // Check perf input with any SPE event type and cycles.
+  // Example collection command:
+  // ```
+  // perf record -e cycles:u -e 'arm_spe_0//u' -- BINARY
+  // ```
+
+  opts::ReadPerfEvents =
+      "1234              cycles:u:                0     a001\n"
+      "1234              cycles:u:                0     b001\n"
+      "1234          instructions:                0     c001\n"
+      "1234          instructions:                0     d001\n"
+      "1234          instructions:              e002    e001\n";
+
+  EXPECT_TRUE(
+      checkEvents(1234, 6, {"cycles:u:", "instruction-spe:", "branch-spe:"}));
+}
+
+TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
+  // Check perf input that has no SPE branch pairs recorded.
+  // Example collection command:
+  // ```
+  // perf record -e cycles:u -e 'arm_spe_0/load_filter=1,branch_filter=0/u' --
+  // BINARY
+  // ```
+
+  testing::internal::CaptureStderr();
+  opts::ReadPerfEvents =
+      "1234          instructions:                 0    a001\n"
+      "1234              cycles:u:                 0    b001\n"
+      "1234          instructions:                 0    c001\n"
+      "1234              cycles:u:                 0    d001\n"
+      "1234          instructions:                 0    e001\n";
+
+  EXPECT_TRUE(checkEvents(1234, 5, {"instruction-spe:", "cycles:u:"}));
+
+  std::string Stderr = testing::internal::GetCapturedStderr();
+  EXPECT_EQ(Stderr, "PERF2BOLT-WARNING: no SPE branches found\n");
+}
+
+#endif

>From 4d7173f85cb94ffa3f8dad0d84f9cd81fa2548c5 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Fri, 20 Dec 2024 15:13:40 +0000
Subject: [PATCH 02/10] clang-format fix

---
 bolt/include/bolt/Profile/DataAggregator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index e2f72a579cb75..d5110eac09ac2 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -295,7 +295,7 @@ class DataAggregator : public DataReader {
   /// ```
   /// SPE branch events will have 'ADDR' set to a branch target address while
   /// other perf or SPE events will have it set to zero.
-  ErrorOr<std::pair<PerfBasicSample,PerfBasicSample>> parseSpeAsBasicSamples();
+  ErrorOr<std::pair<PerfBasicSample, PerfBasicSample>> parseSpeAsBasicSamples();
 
   /// Parse a single perf sample containing a PID associated with an IP and
   /// address.

>From f8b11bea11925c629ff6eacbfcf762aa33c55792 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Wed, 15 Jan 2025 15:11:12 +0000
Subject: [PATCH 03/10] Addressing reviewers (1)

---
 bolt/include/bolt/Utils/CommandLineOpts.h |  1 +
 bolt/lib/Profile/DataAggregator.cpp       |  4 +++-
 bolt/tools/driver/llvm-bolt.cpp           |  4 +---
 bolt/unittests/Profile/PerfSpeEvents.cpp  | 10 +++++-----
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index 4acce5a3e8320..a75b6bf720ec4 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -48,6 +48,7 @@ extern llvm::cl::OptionCategory BinaryAnalysisCategory;
 extern llvm::cl::opt<unsigned> AlignText;
 extern llvm::cl::opt<unsigned> AlignFunctions;
 extern llvm::cl::opt<bool> AggregateOnly;
+extern llvm::cl::opt<bool> ArmSPE;
 extern llvm::cl::opt<unsigned> BucketsPerLine;
 extern llvm::cl::opt<bool> CompactCodeModel;
 extern llvm::cl::opt<bool> DiffOnly;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index ffc3e5a6fa112..8cd207ece1ea6 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1180,6 +1180,7 @@ DataAggregator::parseSpeAsBasicSamples() {
   ErrorOr<uint64_t> AddrResTo = parseHexField(FieldSeparator);
   if (std::error_code EC = AddrResTo.getError())
     return EC;
+
   consumeAllRemainingFS();
 
   ErrorOr<uint64_t> AddrResFrom = parseHexField(FieldSeparator, true);
@@ -1199,12 +1200,13 @@ DataAggregator::parseSpeAsBasicSamples() {
 
     if (!BC->HasFixedLoadAddress)
       adjustAddress(Address, MMapInfoIter->second);
+
     return PerfBasicSample{Event.get(), Address};
   };
 
   // Show more meaningful event names on boltdata.
   if (Event->str() == "instructions:")
-    Event = *AddrResTo != 0x0 ? "branch-spe:" : "instruction-spe:";
+    Event = *AddrResTo != 0x0 ? "branches-spe:" : "instructions-spe:";
 
   return std::make_pair(genBasicSample(*AddrResFrom),
                         genBasicSample(*AddrResTo));
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index 66ccc8d0b65f4..2e91118c00a83 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -51,8 +51,6 @@ static cl::opt<std::string> InputFilename(cl::Positional,
                                           cl::Required, cl::cat(BoltCategory),
                                           cl::sub(cl::SubCommand::getAll()));
 
-extern cl::opt<bool> ArmSPE;
-
 static cl::opt<std::string>
 InputDataFilename("data",
   cl::desc("<data file>"),
@@ -241,7 +239,7 @@ int main(int argc, char **argv) {
       RewriteInstance &RI = *RIOrErr.get();
 
       if (opts::AggregateOnly && !RI.getBinaryContext().isAArch64() &&
-          opts::ArmSPE == 1) {
+          opts::ArmSPE) {
         errs() << "BOLT-ERROR: -spe is available only on AArch64.\n";
         exit(1);
       }
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 807a3bb1e07f4..e52393b516fa3 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -110,7 +110,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
       "1234          instructions:              d002    d001\n"
       "1234          instructions:              e002    e001\n";
 
-  EXPECT_TRUE(checkEvents(1234, 10, {"branch-spe:"}));
+  EXPECT_TRUE(checkEvents(1234, 10, {"branches-spe:"}));
 }
 
 TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
@@ -127,7 +127,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
       "1234          instructions:              d002    d001\n"
       "1234          instructions:              e002    e001\n";
 
-  EXPECT_TRUE(checkEvents(1234, 8, {"branch-spe:", "cycles:u:"}));
+  EXPECT_TRUE(checkEvents(1234, 8, {"branches-spe:", "cycles:u:"}));
 }
 
 TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
@@ -144,8 +144,8 @@ TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
       "1234          instructions:                0     d001\n"
       "1234          instructions:              e002    e001\n";
 
-  EXPECT_TRUE(
-      checkEvents(1234, 6, {"cycles:u:", "instruction-spe:", "branch-spe:"}));
+  EXPECT_TRUE(checkEvents(1234, 6,
+                          {"cycles:u:", "instructions-spe:", "branches-spe:"}));
 }
 
 TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
@@ -164,7 +164,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
       "1234              cycles:u:                 0    d001\n"
       "1234          instructions:                 0    e001\n";
 
-  EXPECT_TRUE(checkEvents(1234, 5, {"instruction-spe:", "cycles:u:"}));
+  EXPECT_TRUE(checkEvents(1234, 5, {"instructions-spe:", "cycles:u:"}));
 
   std::string Stderr = testing::internal::GetCapturedStderr();
   EXPECT_EQ(Stderr, "PERF2BOLT-WARNING: no SPE branches found\n");

>From 67867c2476256e462e8c3aa9feef373beb3d2c2f Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Fri, 17 Jan 2025 13:42:19 +0000
Subject: [PATCH 04/10] Addressing reviewers (2)

---
 bolt/test/perf2bolt/X86/perf2bolt-spe.test | 2 +-
 bolt/tools/driver/llvm-bolt.cpp            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/test/perf2bolt/X86/perf2bolt-spe.test b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
index f31c17f411137..ec24c44c4d13d 100644
--- a/bolt/test/perf2bolt/X86/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
@@ -6,4 +6,4 @@ RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.e
 RUN: touch %t.empty.perf.data
 RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s
 
-CHECK: BOLT-ERROR: -spe is available only on AArch64.
+CHECK: perf2bolt: -spe is available only on AArch64.
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index 2e91118c00a83..cf1b31f8c0c66 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -240,7 +240,7 @@ int main(int argc, char **argv) {
 
       if (opts::AggregateOnly && !RI.getBinaryContext().isAArch64() &&
           opts::ArmSPE) {
-        errs() << "BOLT-ERROR: -spe is available only on AArch64.\n";
+        errs() << ToolName << ": -spe is available only on AArch64.\n";
         exit(1);
       }
 

>From 699cbf65a3da1572592277b5d47ee93ccee5d0c3 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Wed, 19 Feb 2025 17:00:47 +0100
Subject: [PATCH 05/10] Add initial support for SPE brstack

Perf will be able to report SPE branch events as similar as it does
with LBR brstack.
Therefore we can utilize the existing LBR parsing process for SPE as well.

Example of the SPE brstack input format:
```bash
perf script -i perf.data -F pid,brstack --itrace=bl
```
```
---
PID    FROM         TO           PREDICTED
---
16984  0x72e342e5f4/0x72e36192d0/M/-/-/11/RET/-
16984  0x72e7b8b3b4/0x72e7b8b3b8/PN/-/-/11/COND/-
16984  0x72e7b92b48/0x72e7b92b4c/PN/-/-/8/COND/-
16984  0x72eacc6b7c/0x760cc94b00/P/-/-/9/RET/-
16984  0x72e3f210fc/0x72e3f21068/P/-/-/4//-
16984  0x72e39b8c5c/0x72e3627b24/P/-/-/4//-
16984  0x72e7b89d20/0x72e7b92bbc/P/-/-/4/RET/-
```
SPE brstack mispredicted flag might be two characters long: 'PN' or 'MN'.
Where 'N' means the branch was marked as NOT-TAKEN. This event is only related to
conditional instruction (conditional branch or compare-and-branch),
it tells that failed its condition code check.

Perf with 'brstack' support for SPE is available here:
```
https://github.com/Leo-Yan/linux/tree/perf_arm_spe_branch_flags_v2
```

Example of useage with SPE perf data:
```bash
perf2bolt -p perf.data -o perf.fdata --spe BINARY
```

Capture standard SPE branch events with perf:
```bash
perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
```

An unittest is also added to check parsing process of 'SPE brstack format'.
---
 bolt/lib/Profile/DataAggregator.cpp           | 70 ++++++++++++-------
 .../test/perf2bolt/AArch64/perf2bolt-spe.test | 15 ++--
 bolt/unittests/Profile/PerfSpeEvents.cpp      | 69 ++++++++++++++++++
 3 files changed, 122 insertions(+), 32 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 8cd207ece1ea6..3f8af58858a6b 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -49,12 +49,10 @@ static cl::opt<bool>
                      cl::desc("aggregate basic samples (without LBR info)"),
                      cl::cat(AggregatorCategory));
 
-cl::opt<bool> ArmSPE(
-    "spe",
-    cl::desc(
-        "Enable Arm SPE mode. Used in conjuction with no-lbr mode, ie `--spe "
-        "--nl`"),
-    cl::cat(AggregatorCategory));
+cl::opt<bool> ArmSPE("spe",
+                     cl::desc("Enable Arm SPE mode. Can combine with `--nl` "
+                              "to use in no-lbr mode"),
+                     cl::cat(AggregatorCategory));
 
 static cl::opt<std::string>
     ITraceAggregation("itrace",
@@ -184,13 +182,16 @@ void DataAggregator::start() {
 
   if (opts::ArmSPE) {
     if (!opts::BasicAggregation) {
-      errs() << "PERF2BOLT-ERROR: Arm SPE mode is combined only with "
-                "BasicAggregation.\n";
-      exit(1);
+      // pid    from_ip      to_ip        predicted/missed not-taken?
+      // 12345  0x123/0x456/PN/-/-/8/RET/-
+      launchPerfProcess("SPE brstack events", MainEventsPPI,
+                        "script -F pid,brstack --itrace=bl",
+                        /*Wait = */ false);
+    } else {
+      launchPerfProcess("SPE branch events (non-lbr)", MainEventsPPI,
+                        "script -F pid,event,ip,addr --itrace=i1i",
+                        /*Wait = */ false);
     }
-    launchPerfProcess("branch events with SPE", MainEventsPPI,
-                      "script -F pid,event,ip,addr --itrace=i1i",
-                      /*Wait = */ false);
   } else if (opts::BasicAggregation) {
     launchPerfProcess("events without LBR", MainEventsPPI,
                       "script -F pid,event,ip",
@@ -522,7 +523,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   filterBinaryMMapInfo();
   prepareToParse("events", MainEventsPPI, ErrorCallback);
 
-  if (((!opts::BasicAggregation && !opts::ArmSPE) && parseBranchEvents()) ||
+  if ((!opts::BasicAggregation && parseBranchEvents()) ||
       (opts::BasicAggregation && opts::ArmSPE && parseSpeAsBasicEvents()) ||
       (opts::BasicAggregation && parseBasicEvents()))
     errs() << "PERF2BOLT: failed to parse samples\n";
@@ -1024,9 +1025,20 @@ ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() {
   if (std::error_code EC = MispredStrRes.getError())
     return EC;
   StringRef MispredStr = MispredStrRes.get();
-  if (MispredStr.size() != 1 ||
-      (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
-    reportError("expected single char for mispred bit");
+  // SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
+  bool ValidStrSize = opts::ArmSPE ?
+    MispredStr.size() >= 1 && MispredStr.size() <= 2 : MispredStr.size() == 1;
+  bool SpeTakenBitErr =
+         (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
+  bool PredictionBitErr =
+         !ValidStrSize ||
+         (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
+  if (SpeTakenBitErr)
+    reportError("expected 'N' as SPE prediction bit for a not-taken branch");
+  if (PredictionBitErr)
+    reportError("expected 'P', 'M' or '-' char as a prediction bit");
+
+ if (SpeTakenBitErr || PredictionBitErr) {
     Diag << "Found: " << MispredStr << "\n";
     return make_error_code(llvm::errc::io_error);
   }
@@ -1602,9 +1614,11 @@ void DataAggregator::printBranchStacksDiagnostics(
 }
 
 std::error_code DataAggregator::parseBranchEvents() {
-  outs() << "PERF2BOLT: parse branch events...\n";
-  NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
-                     TimerGroupDesc, opts::TimeAggregator);
+  std::string BranchEventTypeStr =
+      opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
+  outs() << "PERF2BOLT: " << BranchEventTypeStr << "...\n";
+  NamedRegionTimer T("parseBranch", "Parsing " + BranchEventTypeStr,
+                     TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
   uint64_t NumEntries = 0;
   uint64_t NumSamples = 0;
@@ -1630,7 +1644,8 @@ std::error_code DataAggregator::parseBranchEvents() {
     }
 
     NumEntries += Sample.LBR.size();
-    if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
+    if (this->BC->isX86() && BAT && Sample.LBR.size() == 32 &&
+        !NeedsSkylakeFix) {
       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
       NeedsSkylakeFix = true;
     }
@@ -1649,10 +1664,17 @@ std::error_code DataAggregator::parseBranchEvents() {
     if (NumSamples && NumSamplesNoLBR == NumSamples) {
       // Note: we don't know if perf2bolt is being used to parse memory samples
       // at this point. In this case, it is OK to parse zero LBRs.
-      errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
-                "LBR. Record profile with perf record -j any or run perf2bolt "
-                "in no-LBR mode with -nl (the performance improvement in -nl "
-                "mode may be limited)\n";
+      if (!opts::ArmSPE)
+        errs()
+            << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
+               "LBR. Record profile with perf record -j any or run perf2bolt "
+               "in no-LBR mode with -nl (the performance improvement in -nl "
+               "mode may be limited)\n";
+      else
+        errs()
+            << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
+               "SPE brstack entries. Record profile with:"
+               "perf record arm_spe_0/branch_filter=1/";
     } else {
       printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
     }
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
index d7cea7ff769b8..95b8e205331a1 100644
--- a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -1,14 +1,13 @@
-## Check that Arm SPE mode is available on AArch64 with BasicAggregation.
+## Check that Arm SPE mode is available on AArch64.
 
 REQUIRES: system-linux,perf,target=aarch64{{.*}}
 
-RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
-RUN: touch %t.empty.perf.data
-RUN: perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
+RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe 2> /dev/null
 
-CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job
+RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe --nl %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
 
-RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe
-RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-LBR
+RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-LBR
+
+CHECK-SPE-NO-LBR: PERF2BOLT: spawning perf job to read SPE branch events (non-lbr)
+CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE brstack events
 
-CHECK-SPE-LBR: PERF2BOLT-ERROR: Arm SPE mode is combined only with BasicAggregation.
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index e52393b516fa3..9209f75147781 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -23,6 +23,7 @@ using namespace llvm::ELF;
 
 namespace opts {
 extern cl::opt<std::string> ReadPerfEvents;
+extern cl::opt<bool> ArmSPE;
 } // namespace opts
 
 namespace llvm {
@@ -38,6 +39,8 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   }
 
 protected:
+  using LBREntry = DataAggregator::LBREntry;
+
   void initalizeLLVM() {
     llvm::InitializeAllTargetInfos();
     llvm::InitializeAllTargetMCs();
@@ -88,6 +91,45 @@ struct PerfSpeEventsTestHelper : public testing::Test {
 
     return SampleSize == DA.BasicSamples.size();
   }
+
+  /// Compare LBREntries
+  bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
+    return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
+           Lhs.Mispred == Rhs.Mispred;
+  }
+
+  /// Parse and check SPE brstack as LBR
+  void parseAndCheckBrstackEvents(
+      uint64_t PID,
+      const std::vector<SmallVector<LBREntry, 2>> &ExpectedSamples) {
+    int NumSamples = 0;
+
+    DataAggregator DA("<pseudo input>");
+    DA.ParsingBuf = opts::ReadPerfEvents;
+    DA.BC = BC.get();
+    DataAggregator::MMapInfo MMap;
+    DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
+
+    // Process buffer.
+    while (DA.hasData()) {
+      ErrorOr<DataAggregator::PerfBranchSample> SampleRes =
+          DA.parseBranchSample();
+      if (std::error_code EC = SampleRes.getError())
+        EXPECT_NE(EC, std::errc::no_such_process);
+
+      DataAggregator::PerfBranchSample &Sample = SampleRes.get();
+      EXPECT_EQ(Sample.LBR.size(), ExpectedSamples[NumSamples].size());
+
+      // Check the parsed LBREntries.
+      const auto *ActualIter = Sample.LBR.begin();
+      const auto *ExpectIter = ExpectedSamples[NumSamples].begin();
+      while (ActualIter != Sample.LBR.end() &&
+             ExpectIter != ExpectedSamples[NumSamples].end())
+        EXPECT_TRUE(checkLBREntry(*ActualIter++, *ExpectIter++));
+
+      ++NumSamples;
+    }
+  }
 };
 
 } // namespace bolt
@@ -113,6 +155,33 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
   EXPECT_TRUE(checkEvents(1234, 10, {"branches-spe:"}));
 }
 
+TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
+  // Check perf input with SPE branch events as brstack format.
+  // Example collection command:
+  // ```
+  // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+  // ```
+  // How Bolt extracts the branch events:
+  // ```
+  // perf script -F pid,brstack --itrace=bl
+  // ```
+
+  opts::ArmSPE = true;
+  opts::ReadPerfEvents = "  1234  0xa001/0xa002/PN/-/-/10/COND/-\n"
+                         "  1234  0xb001/0xb002/P/-/-/4/RET/-\n"
+                         "  1234  0xc001/0xc002/P/-/-/13/-/-\n"
+                         "  1234  0xd001/0xd002/M/-/-/7/RET/-\n"
+                         "  1234  0xe001/0xe002/P/-/-/14/RET/-\n"
+                         "  1234  0xf001/0xf002/MN/-/-/8/COND/-\n";
+
+  std::vector<SmallVector<LBREntry, 2>> ExpectedSamples = {
+      {{{0xa001, 0xa002, false}}}, {{{0xb001, 0xb002, false}}},
+      {{{0xc001, 0xc002, false}}}, {{{0xd001, 0xd002, true}}},
+      {{{0xe001, 0xe002, false}}}, {{{0xf001, 0xf002, true}}},
+  };
+  parseAndCheckBrstackEvents(1234, ExpectedSamples);
+}
+
 TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
   // Check perf input with SPE branch events and cycles.
   // Example collection command:

>From 92c60977ebd63154c454b825e166935bd68c6d33 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Thu, 10 Apr 2025 15:37:29 +0200
Subject: [PATCH 06/10] Fix format issue

---
 bolt/lib/Profile/DataAggregator.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 3f8af58858a6b..4bd499c2f8a8c 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1026,19 +1026,20 @@ ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() {
     return EC;
   StringRef MispredStr = MispredStrRes.get();
   // SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
-  bool ValidStrSize = opts::ArmSPE ?
-    MispredStr.size() >= 1 && MispredStr.size() <= 2 : MispredStr.size() == 1;
+  bool ValidStrSize = opts::ArmSPE
+                          ? MispredStr.size() >= 1 && MispredStr.size() <= 2
+                          : MispredStr.size() == 1;
   bool SpeTakenBitErr =
-         (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
+      (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
   bool PredictionBitErr =
-         !ValidStrSize ||
-         (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
+      !ValidStrSize ||
+      (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
   if (SpeTakenBitErr)
     reportError("expected 'N' as SPE prediction bit for a not-taken branch");
   if (PredictionBitErr)
     reportError("expected 'P', 'M' or '-' char as a prediction bit");
 
- if (SpeTakenBitErr || PredictionBitErr) {
+  if (SpeTakenBitErr || PredictionBitErr) {
     Diag << "Found: " << MispredStr << "\n";
     return make_error_code(llvm::errc::io_error);
   }

>From ffd2f26669e2250367c7140f1f1846fab286398d Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Tue, 15 Apr 2025 12:50:16 +0200
Subject: [PATCH 07/10] Fix typo

---
 bolt/lib/Profile/DataAggregator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 4bd499c2f8a8c..0e2bfda7811b6 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1616,7 +1616,7 @@ void DataAggregator::printBranchStacksDiagnostics(
 
 std::error_code DataAggregator::parseBranchEvents() {
   std::string BranchEventTypeStr =
-      opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
+      !opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
   outs() << "PERF2BOLT: " << BranchEventTypeStr << "...\n";
   NamedRegionTimer T("parseBranch", "Parsing " + BranchEventTypeStr,
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);

>From b526d2b2c3da970ce012d57267971a4d34596e17 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Thu, 29 May 2025 09:42:03 +0200
Subject: [PATCH 08/10] Removing dependency of the SPE BasicAggregation

This commit aim is to uncouple the SPE BRStack and SPE BasicAggregation approaches
based on the decision in issue #115333.

BRStack change relies on the unit test logic which was introduced by
Paschalis Mpeis (ARM) in #120741. Since it is a common part of the two aggregation
type technique, needs to retain an essential part of it.

All relevant tests to BasicAggregation is removed.

Co-Authored-By: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
---
 bolt/include/bolt/Profile/DataAggregator.h    |  12 --
 bolt/lib/Profile/DataAggregator.cpp           | 131 ++----------------
 .../test/perf2bolt/AArch64/perf2bolt-spe.test |   3 -
 bolt/unittests/Profile/PerfSpeEvents.cpp      |  96 -------------
 4 files changed, 8 insertions(+), 234 deletions(-)

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index d5110eac09ac2..ce9a6630a4d2c 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -288,15 +288,6 @@ class DataAggregator : public DataReader {
   /// and a PC
   ErrorOr<PerfBasicSample> parseBasicSample();
 
-  /// Parse an Arm SPE entry into the non-lbr format by generating two basic
-  /// samples. The format of an input SPE entry is:
-  /// ```
-  /// PID   EVENT-TYPE   ADDR   IP
-  /// ```
-  /// SPE branch events will have 'ADDR' set to a branch target address while
-  /// other perf or SPE events will have it set to zero.
-  ErrorOr<std::pair<PerfBasicSample, PerfBasicSample>> parseSpeAsBasicSamples();
-
   /// Parse a single perf sample containing a PID associated with an IP and
   /// address.
   ErrorOr<PerfMemSample> parseMemSample();
@@ -343,9 +334,6 @@ class DataAggregator : public DataReader {
   /// Process non-LBR events.
   void processBasicEvents();
 
-  /// Parse Arm SPE events into the non-LBR format.
-  std::error_code parseSpeAsBasicEvents();
-
   /// Parse the full output generated by perf script to report memory events.
   std::error_code parseMemEvents();
 
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 0e2bfda7811b6..0e6ec7228ed06 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -181,17 +181,11 @@ void DataAggregator::start() {
   findPerfExecutable();
 
   if (opts::ArmSPE) {
-    if (!opts::BasicAggregation) {
-      // pid    from_ip      to_ip        predicted/missed not-taken?
-      // 12345  0x123/0x456/PN/-/-/8/RET/-
-      launchPerfProcess("SPE brstack events", MainEventsPPI,
-                        "script -F pid,brstack --itrace=bl",
-                        /*Wait = */ false);
-    } else {
-      launchPerfProcess("SPE branch events (non-lbr)", MainEventsPPI,
-                        "script -F pid,event,ip,addr --itrace=i1i",
-                        /*Wait = */ false);
-    }
+    // pid    from_ip      to_ip        predicted/missed not-taken?
+    // 12345  0x123/0x456/PN/-/-/8/RET/-
+    launchPerfProcess("SPE brstack events", MainEventsPPI,
+                      "script -F pid,brstack --itrace=bl",
+                      /*Wait = */ false);
   } else if (opts::BasicAggregation) {
     launchPerfProcess("events without LBR", MainEventsPPI,
                       "script -F pid,event,ip",
@@ -464,20 +458,14 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   this->BC = &BC;
 
-  const Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
-                     "Cannot print 'addr' field.");
-
-  auto ErrorCallback = [&NoData](int ReturnCode, StringRef ErrBuf) {
-    if (opts::ArmSPE && NoData.match(ErrBuf)) {
-      errs() << "PERF2BOLT-ERROR: perf data are incompatible for Arm SPE mode "
-                "consumption. ADDR attribute is unset.\n";
-      exit(1);
-    }
+  auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
     errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
     exit(1);
   };
 
   auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
+    Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
+                 "Cannot print 'addr' field.");
     if (!NoData.match(ErrBuf))
       ErrorCallback(ReturnCode, ErrBuf);
   };
@@ -524,7 +512,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   prepareToParse("events", MainEventsPPI, ErrorCallback);
 
   if ((!opts::BasicAggregation && parseBranchEvents()) ||
-      (opts::BasicAggregation && opts::ArmSPE && parseSpeAsBasicEvents()) ||
       (opts::BasicAggregation && parseBasicEvents()))
     errs() << "PERF2BOLT: failed to parse samples\n";
 
@@ -1163,68 +1150,6 @@ ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
   return PerfBasicSample{Event.get(), Address};
 }
 
-ErrorOr<
-    std::pair<DataAggregator::PerfBasicSample, DataAggregator::PerfBasicSample>>
-DataAggregator::parseSpeAsBasicSamples() {
-  while (checkAndConsumeFS()) {
-  }
-
-  ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
-  if (std::error_code EC = PIDRes.getError())
-    return EC;
-
-  constexpr PerfBasicSample EmptySample = PerfBasicSample{StringRef(), 0};
-  auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
-  if (MMapInfoIter == BinaryMMapInfo.end()) {
-    consumeRestOfLine();
-    return std::make_pair(EmptySample, EmptySample);
-  }
-
-  while (checkAndConsumeFS()) {
-  }
-
-  ErrorOr<StringRef> Event = parseString(FieldSeparator);
-  if (std::error_code EC = Event.getError())
-    return EC;
-
-  while (checkAndConsumeFS()) {
-  }
-
-  ErrorOr<uint64_t> AddrResTo = parseHexField(FieldSeparator);
-  if (std::error_code EC = AddrResTo.getError())
-    return EC;
-
-  consumeAllRemainingFS();
-
-  ErrorOr<uint64_t> AddrResFrom = parseHexField(FieldSeparator, true);
-  if (std::error_code EC = AddrResFrom.getError())
-    return EC;
-
-  if (!checkAndConsumeNewLine()) {
-    reportError("expected end of line");
-    return make_error_code(llvm::errc::io_error);
-  }
-
-  auto genBasicSample = [&](uint64_t Address) {
-    // When fed with non SPE branch events the target address will be null.
-    // This is expected and ignored.
-    if (Address == 0x0)
-      return EmptySample;
-
-    if (!BC->HasFixedLoadAddress)
-      adjustAddress(Address, MMapInfoIter->second);
-
-    return PerfBasicSample{Event.get(), Address};
-  };
-
-  // Show more meaningful event names on boltdata.
-  if (Event->str() == "instructions:")
-    Event = *AddrResTo != 0x0 ? "branches-spe:" : "instructions-spe:";
-
-  return std::make_pair(genBasicSample(*AddrResFrom),
-                        genBasicSample(*AddrResTo));
-}
-
 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
   PerfMemSample Res{0, 0};
 
@@ -1734,46 +1659,6 @@ std::error_code DataAggregator::parseBasicEvents() {
   return std::error_code();
 }
 
-std::error_code DataAggregator::parseSpeAsBasicEvents() {
-  outs() << "PERF2BOLT: parsing SPE data as basic events (no LBR)...\n";
-  NamedRegionTimer T("parseSPEBasic", "Parsing SPE as basic events",
-                     TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
-  uint64_t NumSpeBranchSamples = 0;
-
-  // Convert entries to one or two basic samples, depending on whether there is
-  // branch target information.
-  while (hasData()) {
-    auto SamplePair = parseSpeAsBasicSamples();
-    if (std::error_code EC = SamplePair.getError())
-      return EC;
-
-    auto registerSample = [this](const PerfBasicSample *Sample) {
-      if (!Sample->PC)
-        return;
-
-      if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
-        BF->setHasProfileAvailable();
-
-      ++BasicSamples[Sample->PC];
-      EventNames.insert(Sample->EventName);
-    };
-
-    if (SamplePair->first.PC != 0x0 && SamplePair->second.PC != 0x0)
-      ++NumSpeBranchSamples;
-
-    registerSample(&SamplePair->first);
-    registerSample(&SamplePair->second);
-  }
-
-  if (NumSpeBranchSamples == 0)
-    errs() << "PERF2BOLT-WARNING: no SPE branches found\n";
-  else
-    outs() << "PERF2BOLT: found " << NumSpeBranchSamples
-           << " SPE branch sample pairs.\n";
-
-  return std::error_code();
-}
-
 void DataAggregator::processBasicEvents() {
   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
index 95b8e205331a1..11cb4b5b762d2 100644
--- a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -4,10 +4,7 @@ REQUIRES: system-linux,perf,target=aarch64{{.*}}
 
 RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe 2> /dev/null
 
-RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe --nl %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
-
 RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-LBR
 
-CHECK-SPE-NO-LBR: PERF2BOLT: spawning perf job to read SPE branch events (non-lbr)
 CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE brstack events
 
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 9209f75147781..205cc742b6268 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -73,25 +73,6 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   std::unique_ptr<ObjectFile> ObjFile;
   std::unique_ptr<BinaryContext> BC;
 
-  /// Return true when the expected \p SampleSize profile data are generated and
-  /// contain all the \p ExpectedEventNames.
-  bool checkEvents(uint64_t PID, size_t SampleSize,
-                   const StringSet<> &ExpectedEventNames) {
-    DataAggregator DA("<pseudo input>");
-    DA.ParsingBuf = opts::ReadPerfEvents;
-    DA.BC = BC.get();
-    DataAggregator::MMapInfo MMap;
-    DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
-
-    DA.parseSpeAsBasicEvents();
-
-    for (auto &EE : ExpectedEventNames)
-      if (!DA.EventNames.contains(EE.first()))
-        return false;
-
-    return SampleSize == DA.BasicSamples.size();
-  }
-
   /// Compare LBREntries
   bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
     return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
@@ -135,26 +116,6 @@ struct PerfSpeEventsTestHelper : public testing::Test {
 } // namespace bolt
 } // namespace llvm
 
-// Check that DataAggregator can parseSpeAsBasicEvents for branch events when
-// combined with other event types.
-
-TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
-  // Check perf input with SPE branch events.
-  // Example collection command:
-  // ```
-  // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
-  // ```
-
-  opts::ReadPerfEvents =
-      "1234          instructions:              a002    a001\n"
-      "1234          instructions:              b002    b001\n"
-      "1234          instructions:              c002    c001\n"
-      "1234          instructions:              d002    d001\n"
-      "1234          instructions:              e002    e001\n";
-
-  EXPECT_TRUE(checkEvents(1234, 10, {"branches-spe:"}));
-}
-
 TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
   // Check perf input with SPE branch events as brstack format.
   // Example collection command:
@@ -182,61 +143,4 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
   parseAndCheckBrstackEvents(1234, ExpectedSamples);
 }
 
-TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
-  // Check perf input with SPE branch events and cycles.
-  // Example collection command:
-  // ```
-  // perf record -e cycles:u -e 'arm_spe_0/branch_filter=1/u' -- BINARY
-  // ```
-
-  opts::ReadPerfEvents =
-      "1234          instructions:              a002    a001\n"
-      "1234              cycles:u:                 0    b001\n"
-      "1234              cycles:u:                 0    c001\n"
-      "1234          instructions:              d002    d001\n"
-      "1234          instructions:              e002    e001\n";
-
-  EXPECT_TRUE(checkEvents(1234, 8, {"branches-spe:", "cycles:u:"}));
-}
-
-TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
-  // Check perf input with any SPE event type and cycles.
-  // Example collection command:
-  // ```
-  // perf record -e cycles:u -e 'arm_spe_0//u' -- BINARY
-  // ```
-
-  opts::ReadPerfEvents =
-      "1234              cycles:u:                0     a001\n"
-      "1234              cycles:u:                0     b001\n"
-      "1234          instructions:                0     c001\n"
-      "1234          instructions:                0     d001\n"
-      "1234          instructions:              e002    e001\n";
-
-  EXPECT_TRUE(checkEvents(1234, 6,
-                          {"cycles:u:", "instructions-spe:", "branches-spe:"}));
-}
-
-TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
-  // Check perf input that has no SPE branch pairs recorded.
-  // Example collection command:
-  // ```
-  // perf record -e cycles:u -e 'arm_spe_0/load_filter=1,branch_filter=0/u' --
-  // BINARY
-  // ```
-
-  testing::internal::CaptureStderr();
-  opts::ReadPerfEvents =
-      "1234          instructions:                 0    a001\n"
-      "1234              cycles:u:                 0    b001\n"
-      "1234          instructions:                 0    c001\n"
-      "1234              cycles:u:                 0    d001\n"
-      "1234          instructions:                 0    e001\n";
-
-  EXPECT_TRUE(checkEvents(1234, 5, {"instructions-spe:", "cycles:u:"}));
-
-  std::string Stderr = testing::internal::GetCapturedStderr();
-  EXPECT_EQ(Stderr, "PERF2BOLT-WARNING: no SPE branches found\n");
-}
-
 #endif

>From b703629856db6473fd8650f367472e17e153ca9e Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Thu, 5 Jun 2025 15:06:28 +0200
Subject: [PATCH 09/10] Address reviewers 2

---
 bolt/lib/Profile/DataAggregator.cpp           | 21 ++++++++++++-------
 .../test/perf2bolt/AArch64/perf2bolt-spe.test |  2 ++
 bolt/test/perf2bolt/X86/perf2bolt-spe.test    |  2 +-
 bolt/unittests/Profile/PerfSpeEvents.cpp      | 15 ++++++-------
 4 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 0e6ec7228ed06..e9361d7d3561c 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -49,9 +49,7 @@ static cl::opt<bool>
                      cl::desc("aggregate basic samples (without LBR info)"),
                      cl::cat(AggregatorCategory));
 
-cl::opt<bool> ArmSPE("spe",
-                     cl::desc("Enable Arm SPE mode. Can combine with `--nl` "
-                              "to use in no-lbr mode"),
+cl::opt<bool> ArmSPE("spe", cl::desc("Enable Arm SPE mode."),
                      cl::cat(AggregatorCategory));
 
 static cl::opt<std::string>
@@ -181,7 +179,10 @@ void DataAggregator::start() {
   findPerfExecutable();
 
   if (opts::ArmSPE) {
-    // pid    from_ip      to_ip        predicted/missed not-taken?
+    // pid    from_ip      to_ip        flags
+    // where flags could be:
+    // P/M: whether branch was Predicted or Mispredicted.
+    // N: optionally appears when the branch was Not-Taken (ie fall-through)
     // 12345  0x123/0x456/PN/-/-/8/RET/-
     launchPerfProcess("SPE brstack events", MainEventsPPI,
                       "script -F pid,brstack --itrace=bl",
@@ -1012,7 +1013,8 @@ ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() {
   if (std::error_code EC = MispredStrRes.getError())
     return EC;
   StringRef MispredStr = MispredStrRes.get();
-  // SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
+  // SPE brstack mispredicted flags might be up to two characters long:
+  // 'PN' or 'MN'. Where 'N' optionally appears.
   bool ValidStrSize = opts::ArmSPE
                           ? MispredStr.size() >= 1 && MispredStr.size() <= 2
                           : MispredStr.size() == 1;
@@ -1542,7 +1544,7 @@ void DataAggregator::printBranchStacksDiagnostics(
 std::error_code DataAggregator::parseBranchEvents() {
   std::string BranchEventTypeStr =
       !opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
-  outs() << "PERF2BOLT: " << BranchEventTypeStr << "...\n";
+  outs() << "PERF2BOLT: parse " << BranchEventTypeStr << "...\n";
   NamedRegionTimer T("parseBranch", "Parsing " + BranchEventTypeStr,
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
@@ -1599,8 +1601,11 @@ std::error_code DataAggregator::parseBranchEvents() {
       else
         errs()
             << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
-               "SPE brstack entries. Record profile with:"
-               "perf record arm_spe_0/branch_filter=1/";
+               "SPE brstack entries. The minimum required version of "
+               "Linux-perf is v6.14 or higher for brstack support. "
+               "With an older Linux-perf you may get zero samples. "
+               "Plese also make sure about you recorded profile with: "
+               "perf record -e 'arm_spe_0/branch_filter=1/'.";
     } else {
       printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
     }
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
index 11cb4b5b762d2..2ee62976da4d9 100644
--- a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -2,6 +2,8 @@
 
 REQUIRES: system-linux,perf,target=aarch64{{.*}}
 
+RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
+
 RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe 2> /dev/null
 
 RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-LBR
diff --git a/bolt/test/perf2bolt/X86/perf2bolt-spe.test b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
index ec24c44c4d13d..8eed2c8595098 100644
--- a/bolt/test/perf2bolt/X86/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
@@ -4,6 +4,6 @@ REQUIRES: system-linux,x86_64-linux
 
 RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
 RUN: touch %t.empty.perf.data
-RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s
+RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --spe --pa %t.exe 2>&1 | FileCheck %s
 
 CHECK: perf2bolt: -spe is available only on AArch64.
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 205cc742b6268..b8ff0a1d972a3 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -73,13 +73,13 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   std::unique_ptr<ObjectFile> ObjFile;
   std::unique_ptr<BinaryContext> BC;
 
-  /// Compare LBREntries
+  // @return true if LBREntries are equal.
   bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
     return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
            Lhs.Mispred == Rhs.Mispred;
   }
 
-  /// Parse and check SPE brstack as LBR
+  // Parse and check SPE brstack as LBR.
   void parseAndCheckBrstackEvents(
       uint64_t PID,
       const std::vector<SmallVector<LBREntry, 2>> &ExpectedSamples) {
@@ -102,12 +102,9 @@ struct PerfSpeEventsTestHelper : public testing::Test {
       EXPECT_EQ(Sample.LBR.size(), ExpectedSamples[NumSamples].size());
 
       // Check the parsed LBREntries.
-      const auto *ActualIter = Sample.LBR.begin();
-      const auto *ExpectIter = ExpectedSamples[NumSamples].begin();
-      while (ActualIter != Sample.LBR.end() &&
-             ExpectIter != ExpectedSamples[NumSamples].end())
-        EXPECT_TRUE(checkLBREntry(*ActualIter++, *ExpectIter++));
-
+      for (auto [Actual, Expected] :
+           zip_equal(Sample.LBR, ExpectedSamples[NumSamples]))
+        EXPECT_TRUE(checkLBREntry(Actual, Expected));
       ++NumSamples;
     }
   }
@@ -135,7 +132,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
                          "  1234  0xe001/0xe002/P/-/-/14/RET/-\n"
                          "  1234  0xf001/0xf002/MN/-/-/8/COND/-\n";
 
-  std::vector<SmallVector<LBREntry, 2>> ExpectedSamples = {
+  std::vector<SmallVector<LBREntry>> ExpectedSamples = {
       {{{0xa001, 0xa002, false}}}, {{{0xb001, 0xb002, false}}},
       {{{0xc001, 0xc002, false}}}, {{{0xd001, 0xd002, true}}},
       {{{0xe001, 0xe002, false}}}, {{{0xf001, 0xf002, true}}},

>From 29a901ee152f6af46bc0e0c57d434218d80396e0 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Tue, 10 Jun 2025 17:40:09 +0200
Subject: [PATCH 10/10] Simplifies SpeBranchesWithBrstack testcase

The test could be simplified after #143288 PR since
the validation phase is removed from parseLBRSample.
Now we can use branchLBRs container for the testing.
Formerly if Bolt was supplied with mock addresses, branchLBRs container
was empty due to validation phase.
---
 bolt/unittests/Profile/PerfSpeEvents.cpp | 44 ++++++------------------
 1 file changed, 10 insertions(+), 34 deletions(-)

diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index b8ff0a1d972a3..674af93578486 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -39,7 +39,7 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   }
 
 protected:
-  using LBREntry = DataAggregator::LBREntry;
+  using Trace = DataAggregator::Trace;
 
   void initalizeLLVM() {
     llvm::InitializeAllTargetInfos();
@@ -73,40 +73,20 @@ struct PerfSpeEventsTestHelper : public testing::Test {
   std::unique_ptr<ObjectFile> ObjFile;
   std::unique_ptr<BinaryContext> BC;
 
-  // @return true if LBREntries are equal.
-  bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
-    return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
-           Lhs.Mispred == Rhs.Mispred;
-  }
-
   // Parse and check SPE brstack as LBR.
-  void parseAndCheckBrstackEvents(
-      uint64_t PID,
-      const std::vector<SmallVector<LBREntry, 2>> &ExpectedSamples) {
-    int NumSamples = 0;
-
+  void parseAndCheckBrstackEvents(uint64_t PID, uint64_t From, uint64_t To,
+                                  uint64_t Count, size_t SampleSize) {
     DataAggregator DA("<pseudo input>");
     DA.ParsingBuf = opts::ReadPerfEvents;
     DA.BC = BC.get();
     DataAggregator::MMapInfo MMap;
     DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
 
-    // Process buffer.
-    while (DA.hasData()) {
-      ErrorOr<DataAggregator::PerfBranchSample> SampleRes =
-          DA.parseBranchSample();
-      if (std::error_code EC = SampleRes.getError())
-        EXPECT_NE(EC, std::errc::no_such_process);
-
-      DataAggregator::PerfBranchSample &Sample = SampleRes.get();
-      EXPECT_EQ(Sample.LBR.size(), ExpectedSamples[NumSamples].size());
-
-      // Check the parsed LBREntries.
-      for (auto [Actual, Expected] :
-           zip_equal(Sample.LBR, ExpectedSamples[NumSamples]))
-        EXPECT_TRUE(checkLBREntry(Actual, Expected));
-      ++NumSamples;
-    }
+    DA.parseBranchEvents();
+
+    EXPECT_EQ(DA.BranchLBRs.size(), SampleSize);
+    EXPECT_EQ(DA.BranchLBRs[Trace(From, To)].MispredCount, Count);
+    EXPECT_EQ(DA.BranchLBRs[Trace(From, To)].TakenCount, Count);
   }
 };
 
@@ -130,14 +110,10 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
                          "  1234  0xc001/0xc002/P/-/-/13/-/-\n"
                          "  1234  0xd001/0xd002/M/-/-/7/RET/-\n"
                          "  1234  0xe001/0xe002/P/-/-/14/RET/-\n"
+                         "  1234  0xd001/0xd002/M/-/-/7/RET/-\n"
                          "  1234  0xf001/0xf002/MN/-/-/8/COND/-\n";
 
-  std::vector<SmallVector<LBREntry>> ExpectedSamples = {
-      {{{0xa001, 0xa002, false}}}, {{{0xb001, 0xb002, false}}},
-      {{{0xc001, 0xc002, false}}}, {{{0xd001, 0xd002, true}}},
-      {{{0xe001, 0xe002, false}}}, {{{0xf001, 0xf002, true}}},
-  };
-  parseAndCheckBrstackEvents(1234, ExpectedSamples);
+  parseAndCheckBrstackEvents(1234, 0xd001, 0xd002, 2, 6);
 }
 
 #endif



More information about the llvm-commits mailing list