[llvm] Add initial support for SPE brstack format (PR #129231)
Ádám Kallai via llvm-commits
llvm-commits at lists.llvm.org
Thu May 29 05:50:43 PDT 2025
https://github.com/kaadam updated https://github.com/llvm/llvm-project/pull/129231
>From a05768a6cebb702384087a020f9cb145e5a2025d Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <paschalis.mpeis at arm.com>
Date: Fri, 20 Dec 2024 14:19:01 +0000
Subject: [PATCH 1/9] [BOLT][AArch64] Introduce SPE mode in BasicAggregation
BOLT gains the ability to process branch target information generated by
Arm SPE data, using the `BasicAggregation` format.
Example usage is:
```bash
perf2bolt -p perf.data -o perf.boltdata --nl --spe BINARY
```
New branch data and compatibility:
---
SPE branch entries in perf data contain a branch pair (`IP` -> `ADDR`)
for the source and destination branches. DataAggregator processes those
by creating two basic samples. Any other event types will have `ADDR`
field set to `0x0`. For those a single sample will be created. Such
events can be either SPE or non-SPE, like `l1d-access` and `cycles`
respectively.
The format of the input perf entries is:
```
PID EVENT-TYPE ADDR IP
```
When on SPE mode and:
- host is not `AArch64`, BOLT will exit with a relevant message
- `ADDR` field is unavailable, BOLT will exit with a relevant message
- no branch pairs were recorded, BOLT will present a warning
Examples of generating profiling data for the SPE mode:
---
Profiles can be captured with perf on AArch64 machines with SPE enabled.
They can be combined with other events, SPE or not.
Capture only SPE branch data events:
```bash
perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
```
Capture any SPE events:
```bash
perf record -e 'arm_spe_0//u' -- BINARY
```
Capture any SPE events and cycles
```bash
perf record -e 'arm_spe_0//u' -e cycles:u -- BINARY
```
More filters, jitter, and specify count to control overheads/quality.
```bash
perf record -e 'arm_spe_0/branch_filter=1,load_filter=0,store_filter=0,jitter=1/u' -c 10007 -- BINARY
```
---
bolt/include/bolt/Profile/DataAggregator.h | 14 ++
bolt/lib/Profile/DataAggregator.cpp | 138 +++++++++++++-
.../test/perf2bolt/AArch64/perf2bolt-spe.test | 14 ++
bolt/test/perf2bolt/X86/perf2bolt-spe.test | 9 +
bolt/tools/driver/llvm-bolt.cpp | 9 +
bolt/unittests/Profile/CMakeLists.txt | 14 ++
bolt/unittests/Profile/PerfSpeEvents.cpp | 173 ++++++++++++++++++
7 files changed, 363 insertions(+), 8 deletions(-)
create mode 100644 bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
create mode 100644 bolt/test/perf2bolt/X86/perf2bolt-spe.test
create mode 100644 bolt/unittests/Profile/PerfSpeEvents.cpp
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 6d918134137d5..d499bae62ad06 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -78,6 +78,8 @@ class DataAggregator : public DataReader {
static bool checkPerfDataMagic(StringRef FileName);
private:
+ friend struct PerfSpeEventsTestHelper;
+
struct PerfBranchSample {
SmallVector<LBREntry, 32> LBR;
};
@@ -279,6 +281,15 @@ class DataAggregator : public DataReader {
/// and a PC
ErrorOr<PerfBasicSample> parseBasicSample();
+ /// Parse an Arm SPE entry into the non-lbr format by generating two basic
+ /// samples. The format of an input SPE entry is:
+ /// ```
+ /// PID EVENT-TYPE ADDR IP
+ /// ```
+ /// SPE branch events will have 'ADDR' set to a branch target address while
+ /// other perf or SPE events will have it set to zero.
+ ErrorOr<std::pair<PerfBasicSample,PerfBasicSample>> parseSpeAsBasicSamples();
+
/// Parse a single perf sample containing a PID associated with an IP and
/// address.
ErrorOr<PerfMemSample> parseMemSample();
@@ -325,6 +336,9 @@ class DataAggregator : public DataReader {
/// Process non-LBR events.
void processBasicEvents();
+ /// Parse Arm SPE events into the non-LBR format.
+ std::error_code parseSpeAsBasicEvents();
+
/// Parse the full output generated by perf script to report memory events.
std::error_code parseMemEvents();
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 6beb60741406e..cea7576d82190 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -49,6 +49,13 @@ static cl::opt<bool>
cl::desc("aggregate basic samples (without LBR info)"),
cl::cat(AggregatorCategory));
+cl::opt<bool> ArmSPE(
+ "spe",
+ cl::desc(
+ "Enable Arm SPE mode. Used in conjuction with no-lbr mode, ie `--spe "
+ "--nl`"),
+ cl::cat(AggregatorCategory));
+
static cl::opt<std::string>
ITraceAggregation("itrace",
cl::desc("Generate LBR info with perf itrace argument"),
@@ -175,11 +182,19 @@ void DataAggregator::start() {
findPerfExecutable();
- if (opts::BasicAggregation) {
- launchPerfProcess("events without LBR",
- MainEventsPPI,
+ if (opts::ArmSPE) {
+ if (!opts::BasicAggregation) {
+ errs() << "PERF2BOLT-ERROR: Arm SPE mode is combined only with "
+ "BasicAggregation.\n";
+ exit(1);
+ }
+ launchPerfProcess("branch events with SPE", MainEventsPPI,
+ "script -F pid,event,ip,addr --itrace=i1i",
+ /*Wait = */ false);
+ } else if (opts::BasicAggregation) {
+ launchPerfProcess("events without LBR", MainEventsPPI,
"script -F pid,event,ip",
- /*Wait = */false);
+ /*Wait = */ false);
} else if (!opts::ITraceAggregation.empty()) {
std::string ItracePerfScriptArgs = llvm::formatv(
"script -F pid,brstack --itrace={0}", opts::ITraceAggregation);
@@ -448,14 +463,20 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;
- auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
+ const Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
+ "Cannot print 'addr' field.");
+
+ auto ErrorCallback = [&NoData](int ReturnCode, StringRef ErrBuf) {
+ if (opts::ArmSPE && NoData.match(ErrBuf)) {
+ errs() << "PERF2BOLT-ERROR: perf data are incompatible for Arm SPE mode "
+ "consumption. ADDR attribute is unset.\n";
+ exit(1);
+ }
errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
exit(1);
};
auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
- Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
- "Cannot print 'addr' field.");
if (!NoData.match(ErrBuf))
ErrorCallback(ReturnCode, ErrBuf);
};
@@ -501,7 +522,8 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
filterBinaryMMapInfo();
prepareToParse("events", MainEventsPPI, ErrorCallback);
- if ((!opts::BasicAggregation && parseBranchEvents()) ||
+ if (((!opts::BasicAggregation && !opts::ArmSPE) && parseBranchEvents()) ||
+ (opts::BasicAggregation && opts::ArmSPE && parseSpeAsBasicEvents()) ||
(opts::BasicAggregation && parseBasicEvents()))
errs() << "PERF2BOLT: failed to parse samples\n";
@@ -1124,6 +1146,66 @@ ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
return PerfBasicSample{Event.get(), Address};
}
+ErrorOr<
+ std::pair<DataAggregator::PerfBasicSample, DataAggregator::PerfBasicSample>>
+DataAggregator::parseSpeAsBasicSamples() {
+ while (checkAndConsumeFS()) {
+ }
+
+ ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
+ if (std::error_code EC = PIDRes.getError())
+ return EC;
+
+ constexpr PerfBasicSample EmptySample = PerfBasicSample{StringRef(), 0};
+ auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
+ if (MMapInfoIter == BinaryMMapInfo.end()) {
+ consumeRestOfLine();
+ return std::make_pair(EmptySample, EmptySample);
+ }
+
+ while (checkAndConsumeFS()) {
+ }
+
+ ErrorOr<StringRef> Event = parseString(FieldSeparator);
+ if (std::error_code EC = Event.getError())
+ return EC;
+
+ while (checkAndConsumeFS()) {
+ }
+
+ ErrorOr<uint64_t> AddrResTo = parseHexField(FieldSeparator);
+ if (std::error_code EC = AddrResTo.getError())
+ return EC;
+ consumeAllRemainingFS();
+
+ ErrorOr<uint64_t> AddrResFrom = parseHexField(FieldSeparator, true);
+ if (std::error_code EC = AddrResFrom.getError())
+ return EC;
+
+ if (!checkAndConsumeNewLine()) {
+ reportError("expected end of line");
+ return make_error_code(llvm::errc::io_error);
+ }
+
+ auto genBasicSample = [&](uint64_t Address) {
+ // When fed with non SPE branch events the target address will be null.
+ // This is expected and ignored.
+ if (Address == 0x0)
+ return EmptySample;
+
+ if (!BC->HasFixedLoadAddress)
+ adjustAddress(Address, MMapInfoIter->second);
+ return PerfBasicSample{Event.get(), Address};
+ };
+
+ // Show more meaningful event names on boltdata.
+ if (Event->str() == "instructions:")
+ Event = *AddrResTo != 0x0 ? "branch-spe:" : "instruction-spe:";
+
+ return std::make_pair(genBasicSample(*AddrResFrom),
+ genBasicSample(*AddrResTo));
+}
+
ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
PerfMemSample Res{0, 0};
@@ -1622,6 +1704,46 @@ std::error_code DataAggregator::parseBasicEvents() {
return std::error_code();
}
+std::error_code DataAggregator::parseSpeAsBasicEvents() {
+ outs() << "PERF2BOLT: parsing SPE data as basic events (no LBR)...\n";
+ NamedRegionTimer T("parseSPEBasic", "Parsing SPE as basic events",
+ TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
+ uint64_t NumSpeBranchSamples = 0;
+
+ // Convert entries to one or two basic samples, depending on whether there is
+ // branch target information.
+ while (hasData()) {
+ auto SamplePair = parseSpeAsBasicSamples();
+ if (std::error_code EC = SamplePair.getError())
+ return EC;
+
+ auto registerSample = [this](const PerfBasicSample *Sample) {
+ if (!Sample->PC)
+ return;
+
+ if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
+ BF->setHasProfileAvailable();
+
+ ++BasicSamples[Sample->PC];
+ EventNames.insert(Sample->EventName);
+ };
+
+ if (SamplePair->first.PC != 0x0 && SamplePair->second.PC != 0x0)
+ ++NumSpeBranchSamples;
+
+ registerSample(&SamplePair->first);
+ registerSample(&SamplePair->second);
+ }
+
+ if (NumSpeBranchSamples == 0)
+ errs() << "PERF2BOLT-WARNING: no SPE branches found\n";
+ else
+ outs() << "PERF2BOLT: found " << NumSpeBranchSamples
+ << " SPE branch sample pairs.\n";
+
+ return std::error_code();
+}
+
void DataAggregator::processBasicEvents() {
outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
new file mode 100644
index 0000000000000..d7cea7ff769b8
--- /dev/null
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -0,0 +1,14 @@
+## Check that Arm SPE mode is available on AArch64 with BasicAggregation.
+
+REQUIRES: system-linux,perf,target=aarch64{{.*}}
+
+RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
+RUN: touch %t.empty.perf.data
+RUN: perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
+
+CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job
+
+RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe
+RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-LBR
+
+CHECK-SPE-LBR: PERF2BOLT-ERROR: Arm SPE mode is combined only with BasicAggregation.
diff --git a/bolt/test/perf2bolt/X86/perf2bolt-spe.test b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
new file mode 100644
index 0000000000000..f31c17f411137
--- /dev/null
+++ b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
@@ -0,0 +1,9 @@
+## Check that Arm SPE mode is unavailable on X86.
+
+REQUIRES: system-linux,x86_64-linux
+
+RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
+RUN: touch %t.empty.perf.data
+RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s
+
+CHECK: BOLT-ERROR: -spe is available only on AArch64.
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index b9836c2397b6b..66ccc8d0b65f4 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -51,6 +51,8 @@ static cl::opt<std::string> InputFilename(cl::Positional,
cl::Required, cl::cat(BoltCategory),
cl::sub(cl::SubCommand::getAll()));
+extern cl::opt<bool> ArmSPE;
+
static cl::opt<std::string>
InputDataFilename("data",
cl::desc("<data file>"),
@@ -237,6 +239,13 @@ int main(int argc, char **argv) {
if (Error E = RIOrErr.takeError())
report_error(opts::InputFilename, std::move(E));
RewriteInstance &RI = *RIOrErr.get();
+
+ if (opts::AggregateOnly && !RI.getBinaryContext().isAArch64() &&
+ opts::ArmSPE == 1) {
+ errs() << "BOLT-ERROR: -spe is available only on AArch64.\n";
+ exit(1);
+ }
+
if (!opts::PerfData.empty()) {
if (!opts::AggregateOnly) {
errs() << ToolName
diff --git a/bolt/unittests/Profile/CMakeLists.txt b/bolt/unittests/Profile/CMakeLists.txt
index e0aa0926b49c0..ce01c6c4b949e 100644
--- a/bolt/unittests/Profile/CMakeLists.txt
+++ b/bolt/unittests/Profile/CMakeLists.txt
@@ -1,11 +1,25 @@
+set(LLVM_LINK_COMPONENTS
+ DebugInfoDWARF
+ Object
+ ${LLVM_TARGETS_TO_BUILD}
+ )
+
add_bolt_unittest(ProfileTests
DataAggregator.cpp
+ PerfSpeEvents.cpp
DISABLE_LLVM_LINK_LLVM_DYLIB
)
target_link_libraries(ProfileTests
PRIVATE
+ LLVMBOLTCore
LLVMBOLTProfile
+ LLVMTargetParser
+ LLVMTestingSupport
)
+foreach (tgt ${BOLT_TARGETS_TO_BUILD})
+ string(TOUPPER "${tgt}" upper)
+ target_compile_definitions(ProfileTests PRIVATE "${upper}_AVAILABLE")
+endforeach()
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
new file mode 100644
index 0000000000000..807a3bb1e07f4
--- /dev/null
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -0,0 +1,173 @@
+//===- bolt/unittests/Profile/PerfSpeEvents.cpp ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef AARCH64_AVAILABLE
+
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Profile/DataAggregator.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::bolt;
+using namespace llvm::object;
+using namespace llvm::ELF;
+
+namespace opts {
+extern cl::opt<std::string> ReadPerfEvents;
+} // namespace opts
+
+namespace llvm {
+namespace bolt {
+
+/// Perform checks on perf SPE branch events combined with other SPE or perf
+/// events.
+struct PerfSpeEventsTestHelper : public testing::Test {
+ void SetUp() override {
+ initalizeLLVM();
+ prepareElf();
+ initializeBOLT();
+ }
+
+protected:
+ void initalizeLLVM() {
+ llvm::InitializeAllTargetInfos();
+ llvm::InitializeAllTargetMCs();
+ llvm::InitializeAllAsmParsers();
+ llvm::InitializeAllDisassemblers();
+ llvm::InitializeAllTargets();
+ llvm::InitializeAllAsmPrinters();
+ }
+
+ void prepareElf() {
+ memcpy(ElfBuf, "\177ELF", 4);
+ ELF64LE::Ehdr *EHdr = reinterpret_cast<typename ELF64LE::Ehdr *>(ElfBuf);
+ EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64;
+ EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB;
+ EHdr->e_machine = llvm::ELF::EM_AARCH64;
+ MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF");
+ ObjFile = cantFail(ObjectFile::createObjectFile(Source));
+ }
+
+ void initializeBOLT() {
+ Relocation::Arch = ObjFile->makeTriple().getArch();
+ BC = cantFail(BinaryContext::createBinaryContext(
+ ObjFile->makeTriple(), std::make_shared<orc::SymbolStringPool>(),
+ ObjFile->getFileName(), nullptr, /*IsPIC*/ false,
+ DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()}));
+ ASSERT_FALSE(!BC);
+ }
+
+ char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {};
+ std::unique_ptr<ObjectFile> ObjFile;
+ std::unique_ptr<BinaryContext> BC;
+
+ /// Return true when the expected \p SampleSize profile data are generated and
+ /// contain all the \p ExpectedEventNames.
+ bool checkEvents(uint64_t PID, size_t SampleSize,
+ const StringSet<> &ExpectedEventNames) {
+ DataAggregator DA("<pseudo input>");
+ DA.ParsingBuf = opts::ReadPerfEvents;
+ DA.BC = BC.get();
+ DataAggregator::MMapInfo MMap;
+ DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
+
+ DA.parseSpeAsBasicEvents();
+
+ for (auto &EE : ExpectedEventNames)
+ if (!DA.EventNames.contains(EE.first()))
+ return false;
+
+ return SampleSize == DA.BasicSamples.size();
+ }
+};
+
+} // namespace bolt
+} // namespace llvm
+
+// Check that DataAggregator can parseSpeAsBasicEvents for branch events when
+// combined with other event types.
+
+TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
+ // Check perf input with SPE branch events.
+ // Example collection command:
+ // ```
+ // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+ // ```
+
+ opts::ReadPerfEvents =
+ "1234 instructions: a002 a001\n"
+ "1234 instructions: b002 b001\n"
+ "1234 instructions: c002 c001\n"
+ "1234 instructions: d002 d001\n"
+ "1234 instructions: e002 e001\n";
+
+ EXPECT_TRUE(checkEvents(1234, 10, {"branch-spe:"}));
+}
+
+TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
+ // Check perf input with SPE branch events and cycles.
+ // Example collection command:
+ // ```
+ // perf record -e cycles:u -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+ // ```
+
+ opts::ReadPerfEvents =
+ "1234 instructions: a002 a001\n"
+ "1234 cycles:u: 0 b001\n"
+ "1234 cycles:u: 0 c001\n"
+ "1234 instructions: d002 d001\n"
+ "1234 instructions: e002 e001\n";
+
+ EXPECT_TRUE(checkEvents(1234, 8, {"branch-spe:", "cycles:u:"}));
+}
+
+TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
+ // Check perf input with any SPE event type and cycles.
+ // Example collection command:
+ // ```
+ // perf record -e cycles:u -e 'arm_spe_0//u' -- BINARY
+ // ```
+
+ opts::ReadPerfEvents =
+ "1234 cycles:u: 0 a001\n"
+ "1234 cycles:u: 0 b001\n"
+ "1234 instructions: 0 c001\n"
+ "1234 instructions: 0 d001\n"
+ "1234 instructions: e002 e001\n";
+
+ EXPECT_TRUE(
+ checkEvents(1234, 6, {"cycles:u:", "instruction-spe:", "branch-spe:"}));
+}
+
+TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
+ // Check perf input that has no SPE branch pairs recorded.
+ // Example collection command:
+ // ```
+ // perf record -e cycles:u -e 'arm_spe_0/load_filter=1,branch_filter=0/u' --
+ // BINARY
+ // ```
+
+ testing::internal::CaptureStderr();
+ opts::ReadPerfEvents =
+ "1234 instructions: 0 a001\n"
+ "1234 cycles:u: 0 b001\n"
+ "1234 instructions: 0 c001\n"
+ "1234 cycles:u: 0 d001\n"
+ "1234 instructions: 0 e001\n";
+
+ EXPECT_TRUE(checkEvents(1234, 5, {"instruction-spe:", "cycles:u:"}));
+
+ std::string Stderr = testing::internal::GetCapturedStderr();
+ EXPECT_EQ(Stderr, "PERF2BOLT-WARNING: no SPE branches found\n");
+}
+
+#endif
>From 6c656cc7bf2f5c381de680fcf6ccf816cf73fcc5 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Fri, 20 Dec 2024 15:13:40 +0000
Subject: [PATCH 2/9] clang-format fix
---
bolt/include/bolt/Profile/DataAggregator.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index d499bae62ad06..499b44b0fc857 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -288,7 +288,7 @@ class DataAggregator : public DataReader {
/// ```
/// SPE branch events will have 'ADDR' set to a branch target address while
/// other perf or SPE events will have it set to zero.
- ErrorOr<std::pair<PerfBasicSample,PerfBasicSample>> parseSpeAsBasicSamples();
+ ErrorOr<std::pair<PerfBasicSample, PerfBasicSample>> parseSpeAsBasicSamples();
/// Parse a single perf sample containing a PID associated with an IP and
/// address.
>From c31c28cb68bc71415ddd22a5bdbd2d989ffc134a Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Wed, 15 Jan 2025 15:11:12 +0000
Subject: [PATCH 3/9] Addressing reviewers (1)
---
bolt/include/bolt/Utils/CommandLineOpts.h | 1 +
bolt/lib/Profile/DataAggregator.cpp | 4 +++-
bolt/tools/driver/llvm-bolt.cpp | 4 +---
bolt/unittests/Profile/PerfSpeEvents.cpp | 10 +++++-----
4 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index ef4081769d2ad..9fd84ced351b4 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -39,6 +39,7 @@ extern llvm::cl::OptionCategory BinaryAnalysisCategory;
extern llvm::cl::opt<unsigned> AlignText;
extern llvm::cl::opt<unsigned> AlignFunctions;
extern llvm::cl::opt<bool> AggregateOnly;
+extern llvm::cl::opt<bool> ArmSPE;
extern llvm::cl::opt<unsigned> BucketsPerLine;
extern llvm::cl::opt<bool> CompactCodeModel;
extern llvm::cl::opt<bool> DiffOnly;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index cea7576d82190..9d44a0310e510 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1176,6 +1176,7 @@ DataAggregator::parseSpeAsBasicSamples() {
ErrorOr<uint64_t> AddrResTo = parseHexField(FieldSeparator);
if (std::error_code EC = AddrResTo.getError())
return EC;
+
consumeAllRemainingFS();
ErrorOr<uint64_t> AddrResFrom = parseHexField(FieldSeparator, true);
@@ -1195,12 +1196,13 @@ DataAggregator::parseSpeAsBasicSamples() {
if (!BC->HasFixedLoadAddress)
adjustAddress(Address, MMapInfoIter->second);
+
return PerfBasicSample{Event.get(), Address};
};
// Show more meaningful event names on boltdata.
if (Event->str() == "instructions:")
- Event = *AddrResTo != 0x0 ? "branch-spe:" : "instruction-spe:";
+ Event = *AddrResTo != 0x0 ? "branches-spe:" : "instructions-spe:";
return std::make_pair(genBasicSample(*AddrResFrom),
genBasicSample(*AddrResTo));
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index 66ccc8d0b65f4..2e91118c00a83 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -51,8 +51,6 @@ static cl::opt<std::string> InputFilename(cl::Positional,
cl::Required, cl::cat(BoltCategory),
cl::sub(cl::SubCommand::getAll()));
-extern cl::opt<bool> ArmSPE;
-
static cl::opt<std::string>
InputDataFilename("data",
cl::desc("<data file>"),
@@ -241,7 +239,7 @@ int main(int argc, char **argv) {
RewriteInstance &RI = *RIOrErr.get();
if (opts::AggregateOnly && !RI.getBinaryContext().isAArch64() &&
- opts::ArmSPE == 1) {
+ opts::ArmSPE) {
errs() << "BOLT-ERROR: -spe is available only on AArch64.\n";
exit(1);
}
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 807a3bb1e07f4..e52393b516fa3 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -110,7 +110,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
"1234 instructions: d002 d001\n"
"1234 instructions: e002 e001\n";
- EXPECT_TRUE(checkEvents(1234, 10, {"branch-spe:"}));
+ EXPECT_TRUE(checkEvents(1234, 10, {"branches-spe:"}));
}
TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
@@ -127,7 +127,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
"1234 instructions: d002 d001\n"
"1234 instructions: e002 e001\n";
- EXPECT_TRUE(checkEvents(1234, 8, {"branch-spe:", "cycles:u:"}));
+ EXPECT_TRUE(checkEvents(1234, 8, {"branches-spe:", "cycles:u:"}));
}
TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
@@ -144,8 +144,8 @@ TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
"1234 instructions: 0 d001\n"
"1234 instructions: e002 e001\n";
- EXPECT_TRUE(
- checkEvents(1234, 6, {"cycles:u:", "instruction-spe:", "branch-spe:"}));
+ EXPECT_TRUE(checkEvents(1234, 6,
+ {"cycles:u:", "instructions-spe:", "branches-spe:"}));
}
TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
@@ -164,7 +164,7 @@ TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
"1234 cycles:u: 0 d001\n"
"1234 instructions: 0 e001\n";
- EXPECT_TRUE(checkEvents(1234, 5, {"instruction-spe:", "cycles:u:"}));
+ EXPECT_TRUE(checkEvents(1234, 5, {"instructions-spe:", "cycles:u:"}));
std::string Stderr = testing::internal::GetCapturedStderr();
EXPECT_EQ(Stderr, "PERF2BOLT-WARNING: no SPE branches found\n");
>From a7da7a88c9eb5e0bf7fcb3b738e4a5159346600a Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Fri, 17 Jan 2025 13:42:19 +0000
Subject: [PATCH 4/9] Addressing reviewers (2)
---
bolt/test/perf2bolt/X86/perf2bolt-spe.test | 2 +-
bolt/tools/driver/llvm-bolt.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/bolt/test/perf2bolt/X86/perf2bolt-spe.test b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
index f31c17f411137..ec24c44c4d13d 100644
--- a/bolt/test/perf2bolt/X86/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/X86/perf2bolt-spe.test
@@ -6,4 +6,4 @@ RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.e
RUN: touch %t.empty.perf.data
RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s
-CHECK: BOLT-ERROR: -spe is available only on AArch64.
+CHECK: perf2bolt: -spe is available only on AArch64.
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index 2e91118c00a83..cf1b31f8c0c66 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -240,7 +240,7 @@ int main(int argc, char **argv) {
if (opts::AggregateOnly && !RI.getBinaryContext().isAArch64() &&
opts::ArmSPE) {
- errs() << "BOLT-ERROR: -spe is available only on AArch64.\n";
+ errs() << ToolName << ": -spe is available only on AArch64.\n";
exit(1);
}
>From 089cfd0cd427f10013feef835fe181aa0c0459f3 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Wed, 19 Feb 2025 17:00:47 +0100
Subject: [PATCH 5/9] Add initial support for SPE brstack
Perf will be able to report SPE branch events as similar as it does
with LBR brstack.
Therefore we can utilize the existing LBR parsing process for SPE as well.
Example of the SPE brstack input format:
```bash
perf script -i perf.data -F pid,brstack --itrace=bl
```
```
---
PID FROM TO PREDICTED
---
16984 0x72e342e5f4/0x72e36192d0/M/-/-/11/RET/-
16984 0x72e7b8b3b4/0x72e7b8b3b8/PN/-/-/11/COND/-
16984 0x72e7b92b48/0x72e7b92b4c/PN/-/-/8/COND/-
16984 0x72eacc6b7c/0x760cc94b00/P/-/-/9/RET/-
16984 0x72e3f210fc/0x72e3f21068/P/-/-/4//-
16984 0x72e39b8c5c/0x72e3627b24/P/-/-/4//-
16984 0x72e7b89d20/0x72e7b92bbc/P/-/-/4/RET/-
```
SPE brstack mispredicted flag might be two characters long: 'PN' or 'MN'.
Where 'N' means the branch was marked as NOT-TAKEN. This event is only related to
conditional instruction (conditional branch or compare-and-branch),
it tells that failed its condition code check.
Perf with 'brstack' support for SPE is available here:
```
https://github.com/Leo-Yan/linux/tree/perf_arm_spe_branch_flags_v2
```
Example of useage with SPE perf data:
```bash
perf2bolt -p perf.data -o perf.fdata --spe BINARY
```
Capture standard SPE branch events with perf:
```bash
perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
```
An unittest is also added to check parsing process of 'SPE brstack format'.
---
bolt/lib/Profile/DataAggregator.cpp | 59 +++++++++------
.../test/perf2bolt/AArch64/perf2bolt-spe.test | 2 +-
bolt/unittests/Profile/PerfSpeEvents.cpp | 71 +++++++++++++++++++
3 files changed, 109 insertions(+), 23 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 9d44a0310e510..3bcb1a5098b7a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -49,12 +49,10 @@ static cl::opt<bool>
cl::desc("aggregate basic samples (without LBR info)"),
cl::cat(AggregatorCategory));
-cl::opt<bool> ArmSPE(
- "spe",
- cl::desc(
- "Enable Arm SPE mode. Used in conjuction with no-lbr mode, ie `--spe "
- "--nl`"),
- cl::cat(AggregatorCategory));
+cl::opt<bool> ArmSPE("spe",
+ cl::desc("Enable Arm SPE mode. Can combine with `--nl` "
+ "to use in no-lbr mode"),
+ cl::cat(AggregatorCategory));
static cl::opt<std::string>
ITraceAggregation("itrace",
@@ -184,13 +182,16 @@ void DataAggregator::start() {
if (opts::ArmSPE) {
if (!opts::BasicAggregation) {
- errs() << "PERF2BOLT-ERROR: Arm SPE mode is combined only with "
- "BasicAggregation.\n";
- exit(1);
+ // pid from_ip to_ip predicted?
+ // 12345 0x123/0x456/P/-/-/8/RET/-
+ launchPerfProcess("SPE branch events", MainEventsPPI,
+ "script -F pid,brstack --itrace=bl",
+ /*Wait = */ false);
+ } else {
+ launchPerfProcess("SPE brstack events", MainEventsPPI,
+ "script -F pid,event,ip,addr --itrace=i1i",
+ /*Wait = */ false);
}
- launchPerfProcess("branch events with SPE", MainEventsPPI,
- "script -F pid,event,ip,addr --itrace=i1i",
- /*Wait = */ false);
} else if (opts::BasicAggregation) {
launchPerfProcess("events without LBR", MainEventsPPI,
"script -F pid,event,ip",
@@ -522,7 +523,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
filterBinaryMMapInfo();
prepareToParse("events", MainEventsPPI, ErrorCallback);
- if (((!opts::BasicAggregation && !opts::ArmSPE) && parseBranchEvents()) ||
+ if ((!opts::BasicAggregation && parseBranchEvents()) ||
(opts::BasicAggregation && opts::ArmSPE && parseSpeAsBasicEvents()) ||
(opts::BasicAggregation && parseBasicEvents()))
errs() << "PERF2BOLT: failed to parse samples\n";
@@ -1020,7 +1021,11 @@ ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
if (std::error_code EC = MispredStrRes.getError())
return EC;
StringRef MispredStr = MispredStrRes.get();
- if (MispredStr.size() != 1 ||
+ // SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
+ bool ProperStrSize = (MispredStr.size() == 2 && opts::ArmSPE)
+ ? (MispredStr[1] == 'N')
+ : (MispredStr.size() == 1);
+ if (!ProperStrSize ||
(MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
reportError("expected single char for mispred bit");
Diag << "Found: " << MispredStr << "\n";
@@ -1597,9 +1602,11 @@ void DataAggregator::printBranchStacksDiagnostics(
}
std::error_code DataAggregator::parseBranchEvents() {
- outs() << "PERF2BOLT: parse branch events...\n";
- NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
- TimerGroupDesc, opts::TimeAggregator);
+ std::string BranchEventTypeStr =
+ opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
+ outs() << "PERF2BOLT: " << BranchEventTypeStr << "...\n";
+ NamedRegionTimer T("parseBranch", "Parsing " + BranchEventTypeStr,
+ TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
uint64_t NumEntries = 0;
uint64_t NumSamples = 0;
@@ -1625,7 +1632,8 @@ std::error_code DataAggregator::parseBranchEvents() {
}
NumEntries += Sample.LBR.size();
- if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
+ if (this->BC->isX86() && BAT && Sample.LBR.size() == 32 &&
+ !NeedsSkylakeFix) {
errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
NeedsSkylakeFix = true;
}
@@ -1644,10 +1652,17 @@ std::error_code DataAggregator::parseBranchEvents() {
if (NumSamples && NumSamplesNoLBR == NumSamples) {
// Note: we don't know if perf2bolt is being used to parse memory samples
// at this point. In this case, it is OK to parse zero LBRs.
- errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
- "LBR. Record profile with perf record -j any or run perf2bolt "
- "in no-LBR mode with -nl (the performance improvement in -nl "
- "mode may be limited)\n";
+ if (!opts::ArmSPE)
+ errs()
+ << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
+ "LBR. Record profile with perf record -j any or run perf2bolt "
+ "in no-LBR mode with -nl (the performance improvement in -nl "
+ "mode may be limited)\n";
+ else
+ errs()
+ << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
+ "SPE brstack entries. Record profile with:"
+ "perf record arm_spe_0/branch_filter=1/";
} else {
printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
}
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
index d7cea7ff769b8..d34a2c7994f72 100644
--- a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -11,4 +11,4 @@ CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job
RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe
RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-LBR
-CHECK-SPE-LBR: PERF2BOLT-ERROR: Arm SPE mode is combined only with BasicAggregation.
+CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE branch events
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index e52393b516fa3..448354b784f29 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -23,6 +23,7 @@ using namespace llvm::ELF;
namespace opts {
extern cl::opt<std::string> ReadPerfEvents;
+extern cl::opt<bool> ArmSPE;
} // namespace opts
namespace llvm {
@@ -88,6 +89,45 @@ struct PerfSpeEventsTestHelper : public testing::Test {
return SampleSize == DA.BasicSamples.size();
}
+
+ /// Compare LBREntries
+ bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
+ return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
+ Lhs.Mispred == Rhs.Mispred;
+ }
+
+ /// Parse and check SPE brstack as LBR
+ void parseAndCheckBrstackEvents(
+ uint64_t PID,
+ const std::vector<SmallVector<LBREntry, 2>> &ExpectedSamples) {
+ int NumSamples = 0;
+
+ DataAggregator DA("<pseudo input>");
+ DA.ParsingBuf = opts::ReadPerfEvents;
+ DA.BC = BC.get();
+ DataAggregator::MMapInfo MMap;
+ DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
+
+ // Process buffer.
+ while (DA.hasData()) {
+ ErrorOr<DataAggregator::PerfBranchSample> SampleRes =
+ DA.parseBranchSample();
+ if (std::error_code EC = SampleRes.getError())
+ EXPECT_NE(EC, std::errc::no_such_process);
+
+ DataAggregator::PerfBranchSample &Sample = SampleRes.get();
+ EXPECT_EQ(Sample.LBR.size(), ExpectedSamples[NumSamples].size());
+
+ // Check the parsed LBREntries.
+ const auto *ActualIter = Sample.LBR.begin();
+ const auto *ExpectIter = ExpectedSamples[NumSamples].begin();
+ while (ActualIter != Sample.LBR.end() &&
+ ExpectIter != ExpectedSamples[NumSamples].end())
+ EXPECT_TRUE(checkLBREntry(*ActualIter++, *ExpectIter++));
+
+ ++NumSamples;
+ }
+ }
};
} // namespace bolt
@@ -113,6 +153,37 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
EXPECT_TRUE(checkEvents(1234, 10, {"branches-spe:"}));
}
+TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
+ // Check perf input with SPE branch events as brstack format.
+ // Example collection command:
+ // ```
+ // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+ // ```
+ // How Bolt extracts the branch events:
+ // ```
+ // perf script -F pid,brstack --itrace=bl
+ // ```
+
+ opts::ArmSPE = true;
+ opts::ReadPerfEvents = " 1234 0xa001/0xa002/PN/-/-/10/COND/-\n"
+ " 1234 0xb001/0xb002/P/-/-/4/RET/-\n"
+ " 1234 0xc001/0xc002/P/-/-/13/-/-\n"
+ " 1234 0xd001/0xd002/M/-/-/7/RET/-\n"
+ " 1234 0xe001/0xe002/P/-/-/14/RET/-\n"
+ " 1234 0xf001/0xf002/MN/-/-/8/COND/-\n";
+
+ LBREntry Entry1 = {0xa001, 0xa002, false};
+ LBREntry Entry2 = {0xb001, 0xb002, false};
+ LBREntry Entry3 = {0xc001, 0xc002, false};
+ LBREntry Entry4 = {0xd001, 0xd002, true};
+ LBREntry Entry5 = {0xe001, 0xe002, false};
+ LBREntry Entry6 = {0xf001, 0xf002, true};
+ std::vector<SmallVector<LBREntry, 2>> ExpectedSamples = {
+ {{Entry1}}, {{Entry2}}, {{Entry3}}, {{Entry4}}, {{Entry5}}, {{Entry6}},
+ };
+ parseAndCheckBrstackEvents(1234, ExpectedSamples);
+}
+
TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
// Check perf input with SPE branch events and cycles.
// Example collection command:
>From 73eb436752c378b2b819746b3bd814b1595c7861 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Mon, 24 Mar 2025 13:54:04 +0100
Subject: [PATCH 6/9] Address reviewers
---
bolt/lib/Profile/DataAggregator.cpp | 27 ++++++++++++-------
.../test/perf2bolt/AArch64/perf2bolt-spe.test | 15 +++++------
bolt/unittests/Profile/PerfSpeEvents.cpp | 10 +++----
3 files changed, 27 insertions(+), 25 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 3bcb1a5098b7a..85fcf962f7712 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -182,13 +182,13 @@ void DataAggregator::start() {
if (opts::ArmSPE) {
if (!opts::BasicAggregation) {
- // pid from_ip to_ip predicted?
- // 12345 0x123/0x456/P/-/-/8/RET/-
- launchPerfProcess("SPE branch events", MainEventsPPI,
+ // pid from_ip to_ip predicted/missed not-taken?
+ // 12345 0x123/0x456/PN/-/-/8/RET/-
+ launchPerfProcess("SPE brstack events", MainEventsPPI,
"script -F pid,brstack --itrace=bl",
/*Wait = */ false);
} else {
- launchPerfProcess("SPE brstack events", MainEventsPPI,
+ launchPerfProcess("SPE branch events (non-lbr)", MainEventsPPI,
"script -F pid,event,ip,addr --itrace=i1i",
/*Wait = */ false);
}
@@ -1022,12 +1022,19 @@ ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
return EC;
StringRef MispredStr = MispredStrRes.get();
// SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
- bool ProperStrSize = (MispredStr.size() == 2 && opts::ArmSPE)
- ? (MispredStr[1] == 'N')
- : (MispredStr.size() == 1);
- if (!ProperStrSize ||
- (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
- reportError("expected single char for mispred bit");
+ bool ValidStrSize = opts::ArmSPE ?
+ MispredStr.size() >= 1 && MispredStr.size() <= 2 : MispredStr.size() == 1;
+ bool SpeTakenBitErr =
+ (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
+ bool PredictionBitErr =
+ !ValidStrSize ||
+ (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
+ if (SpeTakenBitErr)
+ reportError("expected 'N' as SPE prediction bit for a not-taken branch");
+ if (PredictionBitErr)
+ reportError("expected 'P', 'M' or '-' char as a prediction bit");
+
+ if (SpeTakenBitErr || PredictionBitErr) {
Diag << "Found: " << MispredStr << "\n";
return make_error_code(llvm::errc::io_error);
}
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
index d34a2c7994f72..95b8e205331a1 100644
--- a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -1,14 +1,13 @@
-## Check that Arm SPE mode is available on AArch64 with BasicAggregation.
+## Check that Arm SPE mode is available on AArch64.
REQUIRES: system-linux,perf,target=aarch64{{.*}}
-RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
-RUN: touch %t.empty.perf.data
-RUN: perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
+RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe 2> /dev/null
-CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job
+RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe --nl %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
-RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe
-RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-LBR
+RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-LBR
+
+CHECK-SPE-NO-LBR: PERF2BOLT: spawning perf job to read SPE branch events (non-lbr)
+CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE brstack events
-CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE branch events
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 448354b784f29..639afe4b65f4d 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -172,14 +172,10 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
" 1234 0xe001/0xe002/P/-/-/14/RET/-\n"
" 1234 0xf001/0xf002/MN/-/-/8/COND/-\n";
- LBREntry Entry1 = {0xa001, 0xa002, false};
- LBREntry Entry2 = {0xb001, 0xb002, false};
- LBREntry Entry3 = {0xc001, 0xc002, false};
- LBREntry Entry4 = {0xd001, 0xd002, true};
- LBREntry Entry5 = {0xe001, 0xe002, false};
- LBREntry Entry6 = {0xf001, 0xf002, true};
std::vector<SmallVector<LBREntry, 2>> ExpectedSamples = {
- {{Entry1}}, {{Entry2}}, {{Entry3}}, {{Entry4}}, {{Entry5}}, {{Entry6}},
+ {{{0xa001, 0xa002, false}}}, {{{0xb001, 0xb002, false}}},
+ {{{0xc001, 0xc002, false}}}, {{{0xd001, 0xd002, true}}},
+ {{{0xe001, 0xe002, false}}}, {{{0xf001, 0xf002, true}}},
};
parseAndCheckBrstackEvents(1234, ExpectedSamples);
}
>From 96e360c3be34a92bf5b25347169f1897ae1a14e1 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Thu, 10 Apr 2025 15:37:29 +0200
Subject: [PATCH 7/9] Fix format issue
---
bolt/lib/Profile/DataAggregator.cpp | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 85fcf962f7712..3e4fb4f8773e3 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1022,19 +1022,20 @@ ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
return EC;
StringRef MispredStr = MispredStrRes.get();
// SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
- bool ValidStrSize = opts::ArmSPE ?
- MispredStr.size() >= 1 && MispredStr.size() <= 2 : MispredStr.size() == 1;
+ bool ValidStrSize = opts::ArmSPE
+ ? MispredStr.size() >= 1 && MispredStr.size() <= 2
+ : MispredStr.size() == 1;
bool SpeTakenBitErr =
- (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
+ (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
bool PredictionBitErr =
- !ValidStrSize ||
- (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
+ !ValidStrSize ||
+ (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
if (SpeTakenBitErr)
reportError("expected 'N' as SPE prediction bit for a not-taken branch");
if (PredictionBitErr)
reportError("expected 'P', 'M' or '-' char as a prediction bit");
- if (SpeTakenBitErr || PredictionBitErr) {
+ if (SpeTakenBitErr || PredictionBitErr) {
Diag << "Found: " << MispredStr << "\n";
return make_error_code(llvm::errc::io_error);
}
>From f95ff734b9a93f77bc02f7bcecf9d81f7b60200e Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Tue, 15 Apr 2025 12:50:16 +0200
Subject: [PATCH 8/9] Fix typo
---
bolt/lib/Profile/DataAggregator.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 3e4fb4f8773e3..e3b11b2aa2ac1 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1611,7 +1611,7 @@ void DataAggregator::printBranchStacksDiagnostics(
std::error_code DataAggregator::parseBranchEvents() {
std::string BranchEventTypeStr =
- opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
+ !opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
outs() << "PERF2BOLT: " << BranchEventTypeStr << "...\n";
NamedRegionTimer T("parseBranch", "Parsing " + BranchEventTypeStr,
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
>From b1516a9d688fed835dce5efc614302649c3baf0e Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Thu, 29 May 2025 09:42:03 +0200
Subject: [PATCH 9/9] Removing dependency of the SPE BasicAggregation
This commit aim is to uncouple the SPE BRStack and SPE BasicAggregation approaches
based on the decision in issue #115333.
BRStack change relies on the unit test logic which was introduced by
Paschalis Mpeis (ARM) in #120741. Since it is a common part of the two aggregation
type technique, needs to retain an essential part of it.
All relevant tests to BasicAggregation is removed.
Co-Authored-By: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
---
bolt/include/bolt/Profile/DataAggregator.h | 12 --
bolt/lib/Profile/DataAggregator.cpp | 131 ++----------------
.../test/perf2bolt/AArch64/perf2bolt-spe.test | 3 -
bolt/unittests/Profile/PerfSpeEvents.cpp | 96 -------------
4 files changed, 8 insertions(+), 234 deletions(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 499b44b0fc857..888c65d045c66 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -281,15 +281,6 @@ class DataAggregator : public DataReader {
/// and a PC
ErrorOr<PerfBasicSample> parseBasicSample();
- /// Parse an Arm SPE entry into the non-lbr format by generating two basic
- /// samples. The format of an input SPE entry is:
- /// ```
- /// PID EVENT-TYPE ADDR IP
- /// ```
- /// SPE branch events will have 'ADDR' set to a branch target address while
- /// other perf or SPE events will have it set to zero.
- ErrorOr<std::pair<PerfBasicSample, PerfBasicSample>> parseSpeAsBasicSamples();
-
/// Parse a single perf sample containing a PID associated with an IP and
/// address.
ErrorOr<PerfMemSample> parseMemSample();
@@ -336,9 +327,6 @@ class DataAggregator : public DataReader {
/// Process non-LBR events.
void processBasicEvents();
- /// Parse Arm SPE events into the non-LBR format.
- std::error_code parseSpeAsBasicEvents();
-
/// Parse the full output generated by perf script to report memory events.
std::error_code parseMemEvents();
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index e3b11b2aa2ac1..2133ca246a7de 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -181,17 +181,11 @@ void DataAggregator::start() {
findPerfExecutable();
if (opts::ArmSPE) {
- if (!opts::BasicAggregation) {
- // pid from_ip to_ip predicted/missed not-taken?
- // 12345 0x123/0x456/PN/-/-/8/RET/-
- launchPerfProcess("SPE brstack events", MainEventsPPI,
- "script -F pid,brstack --itrace=bl",
- /*Wait = */ false);
- } else {
- launchPerfProcess("SPE branch events (non-lbr)", MainEventsPPI,
- "script -F pid,event,ip,addr --itrace=i1i",
- /*Wait = */ false);
- }
+ // pid from_ip to_ip predicted/missed not-taken?
+ // 12345 0x123/0x456/PN/-/-/8/RET/-
+ launchPerfProcess("SPE brstack events", MainEventsPPI,
+ "script -F pid,brstack --itrace=bl",
+ /*Wait = */ false);
} else if (opts::BasicAggregation) {
launchPerfProcess("events without LBR", MainEventsPPI,
"script -F pid,event,ip",
@@ -464,20 +458,14 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;
- const Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
- "Cannot print 'addr' field.");
-
- auto ErrorCallback = [&NoData](int ReturnCode, StringRef ErrBuf) {
- if (opts::ArmSPE && NoData.match(ErrBuf)) {
- errs() << "PERF2BOLT-ERROR: perf data are incompatible for Arm SPE mode "
- "consumption. ADDR attribute is unset.\n";
- exit(1);
- }
+ auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
exit(1);
};
auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
+ Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
+ "Cannot print 'addr' field.");
if (!NoData.match(ErrBuf))
ErrorCallback(ReturnCode, ErrBuf);
};
@@ -524,7 +512,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
prepareToParse("events", MainEventsPPI, ErrorCallback);
if ((!opts::BasicAggregation && parseBranchEvents()) ||
- (opts::BasicAggregation && opts::ArmSPE && parseSpeAsBasicEvents()) ||
(opts::BasicAggregation && parseBasicEvents()))
errs() << "PERF2BOLT: failed to parse samples\n";
@@ -1159,68 +1146,6 @@ ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
return PerfBasicSample{Event.get(), Address};
}
-ErrorOr<
- std::pair<DataAggregator::PerfBasicSample, DataAggregator::PerfBasicSample>>
-DataAggregator::parseSpeAsBasicSamples() {
- while (checkAndConsumeFS()) {
- }
-
- ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
- if (std::error_code EC = PIDRes.getError())
- return EC;
-
- constexpr PerfBasicSample EmptySample = PerfBasicSample{StringRef(), 0};
- auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
- if (MMapInfoIter == BinaryMMapInfo.end()) {
- consumeRestOfLine();
- return std::make_pair(EmptySample, EmptySample);
- }
-
- while (checkAndConsumeFS()) {
- }
-
- ErrorOr<StringRef> Event = parseString(FieldSeparator);
- if (std::error_code EC = Event.getError())
- return EC;
-
- while (checkAndConsumeFS()) {
- }
-
- ErrorOr<uint64_t> AddrResTo = parseHexField(FieldSeparator);
- if (std::error_code EC = AddrResTo.getError())
- return EC;
-
- consumeAllRemainingFS();
-
- ErrorOr<uint64_t> AddrResFrom = parseHexField(FieldSeparator, true);
- if (std::error_code EC = AddrResFrom.getError())
- return EC;
-
- if (!checkAndConsumeNewLine()) {
- reportError("expected end of line");
- return make_error_code(llvm::errc::io_error);
- }
-
- auto genBasicSample = [&](uint64_t Address) {
- // When fed with non SPE branch events the target address will be null.
- // This is expected and ignored.
- if (Address == 0x0)
- return EmptySample;
-
- if (!BC->HasFixedLoadAddress)
- adjustAddress(Address, MMapInfoIter->second);
-
- return PerfBasicSample{Event.get(), Address};
- };
-
- // Show more meaningful event names on boltdata.
- if (Event->str() == "instructions:")
- Event = *AddrResTo != 0x0 ? "branches-spe:" : "instructions-spe:";
-
- return std::make_pair(genBasicSample(*AddrResFrom),
- genBasicSample(*AddrResTo));
-}
-
ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
PerfMemSample Res{0, 0};
@@ -1729,46 +1654,6 @@ std::error_code DataAggregator::parseBasicEvents() {
return std::error_code();
}
-std::error_code DataAggregator::parseSpeAsBasicEvents() {
- outs() << "PERF2BOLT: parsing SPE data as basic events (no LBR)...\n";
- NamedRegionTimer T("parseSPEBasic", "Parsing SPE as basic events",
- TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
- uint64_t NumSpeBranchSamples = 0;
-
- // Convert entries to one or two basic samples, depending on whether there is
- // branch target information.
- while (hasData()) {
- auto SamplePair = parseSpeAsBasicSamples();
- if (std::error_code EC = SamplePair.getError())
- return EC;
-
- auto registerSample = [this](const PerfBasicSample *Sample) {
- if (!Sample->PC)
- return;
-
- if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
- BF->setHasProfileAvailable();
-
- ++BasicSamples[Sample->PC];
- EventNames.insert(Sample->EventName);
- };
-
- if (SamplePair->first.PC != 0x0 && SamplePair->second.PC != 0x0)
- ++NumSpeBranchSamples;
-
- registerSample(&SamplePair->first);
- registerSample(&SamplePair->second);
- }
-
- if (NumSpeBranchSamples == 0)
- errs() << "PERF2BOLT-WARNING: no SPE branches found\n";
- else
- outs() << "PERF2BOLT: found " << NumSpeBranchSamples
- << " SPE branch sample pairs.\n";
-
- return std::error_code();
-}
-
void DataAggregator::processBasicEvents() {
outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
index 95b8e205331a1..11cb4b5b762d2 100644
--- a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -4,10 +4,7 @@ REQUIRES: system-linux,perf,target=aarch64{{.*}}
RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe 2> /dev/null
-RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe --nl %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
-
RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-LBR
-CHECK-SPE-NO-LBR: PERF2BOLT: spawning perf job to read SPE branch events (non-lbr)
CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE brstack events
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 639afe4b65f4d..f26f2d59db944 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -71,25 +71,6 @@ struct PerfSpeEventsTestHelper : public testing::Test {
std::unique_ptr<ObjectFile> ObjFile;
std::unique_ptr<BinaryContext> BC;
- /// Return true when the expected \p SampleSize profile data are generated and
- /// contain all the \p ExpectedEventNames.
- bool checkEvents(uint64_t PID, size_t SampleSize,
- const StringSet<> &ExpectedEventNames) {
- DataAggregator DA("<pseudo input>");
- DA.ParsingBuf = opts::ReadPerfEvents;
- DA.BC = BC.get();
- DataAggregator::MMapInfo MMap;
- DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
-
- DA.parseSpeAsBasicEvents();
-
- for (auto &EE : ExpectedEventNames)
- if (!DA.EventNames.contains(EE.first()))
- return false;
-
- return SampleSize == DA.BasicSamples.size();
- }
-
/// Compare LBREntries
bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
@@ -133,26 +114,6 @@ struct PerfSpeEventsTestHelper : public testing::Test {
} // namespace bolt
} // namespace llvm
-// Check that DataAggregator can parseSpeAsBasicEvents for branch events when
-// combined with other event types.
-
-TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
- // Check perf input with SPE branch events.
- // Example collection command:
- // ```
- // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
- // ```
-
- opts::ReadPerfEvents =
- "1234 instructions: a002 a001\n"
- "1234 instructions: b002 b001\n"
- "1234 instructions: c002 c001\n"
- "1234 instructions: d002 d001\n"
- "1234 instructions: e002 e001\n";
-
- EXPECT_TRUE(checkEvents(1234, 10, {"branches-spe:"}));
-}
-
TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
// Check perf input with SPE branch events as brstack format.
// Example collection command:
@@ -180,61 +141,4 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
parseAndCheckBrstackEvents(1234, ExpectedSamples);
}
-TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
- // Check perf input with SPE branch events and cycles.
- // Example collection command:
- // ```
- // perf record -e cycles:u -e 'arm_spe_0/branch_filter=1/u' -- BINARY
- // ```
-
- opts::ReadPerfEvents =
- "1234 instructions: a002 a001\n"
- "1234 cycles:u: 0 b001\n"
- "1234 cycles:u: 0 c001\n"
- "1234 instructions: d002 d001\n"
- "1234 instructions: e002 e001\n";
-
- EXPECT_TRUE(checkEvents(1234, 8, {"branches-spe:", "cycles:u:"}));
-}
-
-TEST_F(PerfSpeEventsTestHelper, SpeAnyEventAndCycles) {
- // Check perf input with any SPE event type and cycles.
- // Example collection command:
- // ```
- // perf record -e cycles:u -e 'arm_spe_0//u' -- BINARY
- // ```
-
- opts::ReadPerfEvents =
- "1234 cycles:u: 0 a001\n"
- "1234 cycles:u: 0 b001\n"
- "1234 instructions: 0 c001\n"
- "1234 instructions: 0 d001\n"
- "1234 instructions: e002 e001\n";
-
- EXPECT_TRUE(checkEvents(1234, 6,
- {"cycles:u:", "instructions-spe:", "branches-spe:"}));
-}
-
-TEST_F(PerfSpeEventsTestHelper, SpeNoBranchPairsRecorded) {
- // Check perf input that has no SPE branch pairs recorded.
- // Example collection command:
- // ```
- // perf record -e cycles:u -e 'arm_spe_0/load_filter=1,branch_filter=0/u' --
- // BINARY
- // ```
-
- testing::internal::CaptureStderr();
- opts::ReadPerfEvents =
- "1234 instructions: 0 a001\n"
- "1234 cycles:u: 0 b001\n"
- "1234 instructions: 0 c001\n"
- "1234 cycles:u: 0 d001\n"
- "1234 instructions: 0 e001\n";
-
- EXPECT_TRUE(checkEvents(1234, 5, {"instructions-spe:", "cycles:u:"}));
-
- std::string Stderr = testing::internal::GetCapturedStderr();
- EXPECT_EQ(Stderr, "PERF2BOLT-WARNING: no SPE branches found\n");
-}
-
#endif
More information about the llvm-commits
mailing list