[llvm-branch-commits] [llvm] Add initial support for SPE brstack	format (PR #129231)
    Ádám Kallai via llvm-branch-commits 
    llvm-branch-commits at lists.llvm.org
       
    Thu Apr 10 06:38:05 PDT 2025
    
    
  
https://github.com/kaadam updated https://github.com/llvm/llvm-project/pull/129231
>From 93c958c3f016092c340e897aeabbb470e58b9dbb Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Wed, 19 Feb 2025 17:00:47 +0100
Subject: [PATCH 1/3] Add initial support for SPE brstack
Perf will be able to report SPE branch events as similar as it does
with LBR brstack.
Therefore we can utilize the existing LBR parsing process for SPE as well.
Example of the SPE brstack input format:
perf script -i perf.data -F pid,brstack --itrace=bl
---
PID    FROM         TO           PREDICTED
---
16984  0x72e342e5f4/0x72e36192d0/M/-/-/11/RET/-
16984  0x72e7b8b3b4/0x72e7b8b3b8/PN/-/-/11/COND/-
16984  0x72e7b92b48/0x72e7b92b4c/PN/-/-/8/COND/-
16984  0x72eacc6b7c/0x760cc94b00/P/-/-/9/RET/-
16984  0x72e3f210fc/0x72e3f21068/P/-/-/4//-
16984  0x72e39b8c5c/0x72e3627b24/P/-/-/4//-
16984  0x72e7b89d20/0x72e7b92bbc/P/-/-/4/RET/-
SPE brstack mispredicted flag might be two characters long: 'PN' or 'MN'.
Where 'N' means the branch was marked as NOT-TAKEN. This event is only related to
conditional instruction (conditional branch or compare-and-branch),
it tells that failed its condition code check.
Perf with 'brstack' support for SPE is available here:
```
https://github.com/Leo-Yan/linux/tree/perf_arm_spe_branch_flags_v2
```
Example of useage with SPE perf data:
```bash
perf2bolt -p perf.data -o perf.fdata --spe BINARY
```
Capture standard SPE branch events with perf:
```bash
perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
```
An unittest is also added to check parsing process of 'SPE brstack format'.
---
 bolt/lib/Profile/DataAggregator.cpp           | 60 ++++++++++------
 .../test/perf2bolt/AArch64/perf2bolt-spe.test |  2 +-
 bolt/unittests/Profile/PerfSpeEvents.cpp      | 71 +++++++++++++++++++
 3 files changed, 109 insertions(+), 24 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index cce9fdbef99bd..4af3a493b8be6 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -49,12 +49,10 @@ static cl::opt<bool>
                      cl::desc("aggregate basic samples (without LBR info)"),
                      cl::cat(AggregatorCategory));
 
-cl::opt<bool> ArmSPE(
-    "spe",
-    cl::desc(
-        "Enable Arm SPE mode. Used in conjuction with no-lbr mode, ie `--spe "
-        "--nl`"),
-    cl::cat(AggregatorCategory));
+cl::opt<bool> ArmSPE("spe",
+                     cl::desc("Enable Arm SPE mode. Can combine with `--nl` "
+                              "to use in no-lbr mode"),
+                     cl::cat(AggregatorCategory));
 
 static cl::opt<std::string>
     ITraceAggregation("itrace",
@@ -180,13 +178,16 @@ void DataAggregator::start() {
 
   if (opts::ArmSPE) {
     if (!opts::BasicAggregation) {
-      errs() << "PERF2BOLT-ERROR: Arm SPE mode is combined only with "
-                "BasicAggregation.\n";
-      exit(1);
+      // pid    from_ip      to_ip        predicted?
+      // 12345  0x123/0x456/P/-/-/8/RET/-
+      launchPerfProcess("SPE branch events", MainEventsPPI,
+                        "script -F pid,brstack --itrace=bl",
+                        /*Wait = */ false);
+    } else {
+      launchPerfProcess("SPE brstack events", MainEventsPPI,
+                        "script -F pid,event,ip,addr --itrace=i1i",
+                        /*Wait = */ false);
     }
-    launchPerfProcess("branch events with SPE", MainEventsPPI,
-                      "script -F pid,event,ip,addr --itrace=i1i",
-                      /*Wait = */ false);
   } else if (opts::BasicAggregation) {
     launchPerfProcess("events without LBR", MainEventsPPI,
                       "script -F pid,event,ip",
@@ -527,8 +528,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
     }
     exit(0);
   }
-
-  if (((!opts::BasicAggregation && !opts::ArmSPE) && parseBranchEvents()) ||
+  if ((!opts::BasicAggregation && parseBranchEvents()) ||
       (opts::BasicAggregation && opts::ArmSPE && parseSpeAsBasicEvents()) ||
       (opts::BasicAggregation && parseBasicEvents()))
     errs() << "PERF2BOLT: failed to parse samples\n";
@@ -1034,7 +1034,11 @@ ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
   if (std::error_code EC = MispredStrRes.getError())
     return EC;
   StringRef MispredStr = MispredStrRes.get();
-  if (MispredStr.size() != 1 ||
+  // SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
+  bool ProperStrSize = (MispredStr.size() == 2 && opts::ArmSPE)
+                           ? (MispredStr[1] == 'N')
+                           : (MispredStr.size() == 1);
+  if (!ProperStrSize ||
       (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
     reportError("expected single char for mispred bit");
     Diag << "Found: " << MispredStr << "\n";
@@ -1565,9 +1569,11 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
 }
 
 std::error_code DataAggregator::parseBranchEvents() {
-  outs() << "PERF2BOLT: parse branch events...\n";
-  NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
-                     TimerGroupDesc, opts::TimeAggregator);
+  std::string BranchEventTypeStr =
+      opts::ArmSPE ? "branch events" : "SPE branch events in LBR-format";
+  outs() << "PERF2BOLT: " << BranchEventTypeStr << "...\n";
+  NamedRegionTimer T("parseBranch", "Parsing " + BranchEventTypeStr,
+                     TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
   uint64_t NumTotalSamples = 0;
   uint64_t NumEntries = 0;
@@ -1595,7 +1601,8 @@ std::error_code DataAggregator::parseBranchEvents() {
     }
 
     NumEntries += Sample.LBR.size();
-    if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
+    if (this->BC->isX86() && BAT && Sample.LBR.size() == 32 &&
+        !NeedsSkylakeFix) {
       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
       NeedsSkylakeFix = true;
     }
@@ -1630,10 +1637,17 @@ std::error_code DataAggregator::parseBranchEvents() {
     if (NumSamples && NumSamplesNoLBR == NumSamples) {
       // Note: we don't know if perf2bolt is being used to parse memory samples
       // at this point. In this case, it is OK to parse zero LBRs.
-      errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
-                "LBR. Record profile with perf record -j any or run perf2bolt "
-                "in no-LBR mode with -nl (the performance improvement in -nl "
-                "mode may be limited)\n";
+      if (!opts::ArmSPE)
+        errs()
+            << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
+               "LBR. Record profile with perf record -j any or run perf2bolt "
+               "in no-LBR mode with -nl (the performance improvement in -nl "
+               "mode may be limited)\n";
+      else
+        errs()
+            << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
+               "SPE brstack entries. Record profile with:"
+               "perf record arm_spe_0/branch_filter=1/";
     } else {
       const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
       const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
index d7cea7ff769b8..d34a2c7994f72 100644
--- a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -11,4 +11,4 @@ CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job
 RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe
 RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-LBR
 
-CHECK-SPE-LBR: PERF2BOLT-ERROR: Arm SPE mode is combined only with BasicAggregation.
+CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE branch events
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index e52393b516fa3..448354b784f29 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -23,6 +23,7 @@ using namespace llvm::ELF;
 
 namespace opts {
 extern cl::opt<std::string> ReadPerfEvents;
+extern cl::opt<bool> ArmSPE;
 } // namespace opts
 
 namespace llvm {
@@ -88,6 +89,45 @@ struct PerfSpeEventsTestHelper : public testing::Test {
 
     return SampleSize == DA.BasicSamples.size();
   }
+
+  /// Compare LBREntries
+  bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
+    return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
+           Lhs.Mispred == Rhs.Mispred;
+  }
+
+  /// Parse and check SPE brstack as LBR
+  void parseAndCheckBrstackEvents(
+      uint64_t PID,
+      const std::vector<SmallVector<LBREntry, 2>> &ExpectedSamples) {
+    int NumSamples = 0;
+
+    DataAggregator DA("<pseudo input>");
+    DA.ParsingBuf = opts::ReadPerfEvents;
+    DA.BC = BC.get();
+    DataAggregator::MMapInfo MMap;
+    DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
+
+    // Process buffer.
+    while (DA.hasData()) {
+      ErrorOr<DataAggregator::PerfBranchSample> SampleRes =
+          DA.parseBranchSample();
+      if (std::error_code EC = SampleRes.getError())
+        EXPECT_NE(EC, std::errc::no_such_process);
+
+      DataAggregator::PerfBranchSample &Sample = SampleRes.get();
+      EXPECT_EQ(Sample.LBR.size(), ExpectedSamples[NumSamples].size());
+
+      // Check the parsed LBREntries.
+      const auto *ActualIter = Sample.LBR.begin();
+      const auto *ExpectIter = ExpectedSamples[NumSamples].begin();
+      while (ActualIter != Sample.LBR.end() &&
+             ExpectIter != ExpectedSamples[NumSamples].end())
+        EXPECT_TRUE(checkLBREntry(*ActualIter++, *ExpectIter++));
+
+      ++NumSamples;
+    }
+  }
 };
 
 } // namespace bolt
@@ -113,6 +153,37 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranches) {
   EXPECT_TRUE(checkEvents(1234, 10, {"branches-spe:"}));
 }
 
+TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
+  // Check perf input with SPE branch events as brstack format.
+  // Example collection command:
+  // ```
+  // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+  // ```
+  // How Bolt extracts the branch events:
+  // ```
+  // perf script -F pid,brstack --itrace=bl
+  // ```
+
+  opts::ArmSPE = true;
+  opts::ReadPerfEvents = "  1234  0xa001/0xa002/PN/-/-/10/COND/-\n"
+                         "  1234  0xb001/0xb002/P/-/-/4/RET/-\n"
+                         "  1234  0xc001/0xc002/P/-/-/13/-/-\n"
+                         "  1234  0xd001/0xd002/M/-/-/7/RET/-\n"
+                         "  1234  0xe001/0xe002/P/-/-/14/RET/-\n"
+                         "  1234  0xf001/0xf002/MN/-/-/8/COND/-\n";
+
+  LBREntry Entry1 = {0xa001, 0xa002, false};
+  LBREntry Entry2 = {0xb001, 0xb002, false};
+  LBREntry Entry3 = {0xc001, 0xc002, false};
+  LBREntry Entry4 = {0xd001, 0xd002, true};
+  LBREntry Entry5 = {0xe001, 0xe002, false};
+  LBREntry Entry6 = {0xf001, 0xf002, true};
+  std::vector<SmallVector<LBREntry, 2>> ExpectedSamples = {
+      {{Entry1}}, {{Entry2}}, {{Entry3}}, {{Entry4}}, {{Entry5}}, {{Entry6}},
+  };
+  parseAndCheckBrstackEvents(1234, ExpectedSamples);
+}
+
 TEST_F(PerfSpeEventsTestHelper, SpeBranchesAndCycles) {
   // Check perf input with SPE branch events and cycles.
   // Example collection command:
>From aec7a556fed56c72184963d21d6893e586d6a7e2 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Mon, 24 Mar 2025 13:54:04 +0100
Subject: [PATCH 2/3] Address reviewers
---
 bolt/lib/Profile/DataAggregator.cpp           | 27 ++++++++++++-------
 .../test/perf2bolt/AArch64/perf2bolt-spe.test | 15 +++++------
 bolt/unittests/Profile/PerfSpeEvents.cpp      | 10 +++----
 3 files changed, 27 insertions(+), 25 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 4af3a493b8be6..4273eda865c2a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -178,13 +178,13 @@ void DataAggregator::start() {
 
   if (opts::ArmSPE) {
     if (!opts::BasicAggregation) {
-      // pid    from_ip      to_ip        predicted?
-      // 12345  0x123/0x456/P/-/-/8/RET/-
-      launchPerfProcess("SPE branch events", MainEventsPPI,
+      // pid    from_ip      to_ip        predicted/missed not-taken?
+      // 12345  0x123/0x456/PN/-/-/8/RET/-
+      launchPerfProcess("SPE brstack events", MainEventsPPI,
                         "script -F pid,brstack --itrace=bl",
                         /*Wait = */ false);
     } else {
-      launchPerfProcess("SPE brstack events", MainEventsPPI,
+      launchPerfProcess("SPE branch events (non-lbr)", MainEventsPPI,
                         "script -F pid,event,ip,addr --itrace=i1i",
                         /*Wait = */ false);
     }
@@ -1035,12 +1035,19 @@ ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
     return EC;
   StringRef MispredStr = MispredStrRes.get();
   // SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
-  bool ProperStrSize = (MispredStr.size() == 2 && opts::ArmSPE)
-                           ? (MispredStr[1] == 'N')
-                           : (MispredStr.size() == 1);
-  if (!ProperStrSize ||
-      (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
-    reportError("expected single char for mispred bit");
+  bool ValidStrSize = opts::ArmSPE ?
+    MispredStr.size() >= 1 && MispredStr.size() <= 2 : MispredStr.size() == 1;
+  bool SpeTakenBitErr =
+         (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
+  bool PredictionBitErr =
+         !ValidStrSize ||
+         (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
+  if (SpeTakenBitErr)
+    reportError("expected 'N' as SPE prediction bit for a not-taken branch");
+  if (PredictionBitErr)
+    reportError("expected 'P', 'M' or '-' char as a prediction bit");
+
+ if (SpeTakenBitErr || PredictionBitErr) {
     Diag << "Found: " << MispredStr << "\n";
     return make_error_code(llvm::errc::io_error);
   }
diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
index d34a2c7994f72..95b8e205331a1 100644
--- a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
+++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test
@@ -1,14 +1,13 @@
-## Check that Arm SPE mode is available on AArch64 with BasicAggregation.
+## Check that Arm SPE mode is available on AArch64.
 
 REQUIRES: system-linux,perf,target=aarch64{{.*}}
 
-RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
-RUN: touch %t.empty.perf.data
-RUN: perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
+RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe 2> /dev/null
 
-CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job
+RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe --nl %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
 
-RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe
-RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-LBR
+RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-LBR
+
+CHECK-SPE-NO-LBR: PERF2BOLT: spawning perf job to read SPE branch events (non-lbr)
+CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE brstack events
 
-CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE branch events
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 448354b784f29..639afe4b65f4d 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -172,14 +172,10 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
                          "  1234  0xe001/0xe002/P/-/-/14/RET/-\n"
                          "  1234  0xf001/0xf002/MN/-/-/8/COND/-\n";
 
-  LBREntry Entry1 = {0xa001, 0xa002, false};
-  LBREntry Entry2 = {0xb001, 0xb002, false};
-  LBREntry Entry3 = {0xc001, 0xc002, false};
-  LBREntry Entry4 = {0xd001, 0xd002, true};
-  LBREntry Entry5 = {0xe001, 0xe002, false};
-  LBREntry Entry6 = {0xf001, 0xf002, true};
   std::vector<SmallVector<LBREntry, 2>> ExpectedSamples = {
-      {{Entry1}}, {{Entry2}}, {{Entry3}}, {{Entry4}}, {{Entry5}}, {{Entry6}},
+      {{{0xa001, 0xa002, false}}}, {{{0xb001, 0xb002, false}}},
+      {{{0xc001, 0xc002, false}}}, {{{0xd001, 0xd002, true}}},
+      {{{0xe001, 0xe002, false}}}, {{{0xf001, 0xf002, true}}},
   };
   parseAndCheckBrstackEvents(1234, ExpectedSamples);
 }
>From 2e0d8fecd4a72d9e9c52321dcb0a1057a44e771a Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Thu, 10 Apr 2025 15:37:29 +0200
Subject: [PATCH 3/3] Fix format issue
---
 bolt/lib/Profile/DataAggregator.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 4273eda865c2a..bcb3b2c8effb3 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1035,19 +1035,20 @@ ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
     return EC;
   StringRef MispredStr = MispredStrRes.get();
   // SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
-  bool ValidStrSize = opts::ArmSPE ?
-    MispredStr.size() >= 1 && MispredStr.size() <= 2 : MispredStr.size() == 1;
+  bool ValidStrSize = opts::ArmSPE
+                          ? MispredStr.size() >= 1 && MispredStr.size() <= 2
+                          : MispredStr.size() == 1;
   bool SpeTakenBitErr =
-         (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
+      (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
   bool PredictionBitErr =
-         !ValidStrSize ||
-         (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
+      !ValidStrSize ||
+      (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
   if (SpeTakenBitErr)
     reportError("expected 'N' as SPE prediction bit for a not-taken branch");
   if (PredictionBitErr)
     reportError("expected 'P', 'M' or '-' char as a prediction bit");
 
- if (SpeTakenBitErr || PredictionBitErr) {
+  if (SpeTakenBitErr || PredictionBitErr) {
     Diag << "Found: " << MispredStr << "\n";
     return make_error_code(llvm::errc::io_error);
   }
    
    
More information about the llvm-branch-commits
mailing list