[llvm] [BOLT] Fix heatmaps on large BOLTEd binaries. (PR #92815)

Paschalis Mpeis via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 29 03:11:52 PDT 2024


https://github.com/paschalis-mpeis updated https://github.com/llvm/llvm-project/pull/92815

>From 505e186842c02e718a127134b64c11ebc493c87a Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Tue, 27 Aug 2024 18:18:07 +0100
Subject: [PATCH 1/3] BOLT fails to read correctly the size of multi-segment
 mmaps.

---
 bolt/include/bolt/Profile/DataAggregator.h |   5 +
 bolt/lib/Profile/DataAggregator.cpp        |  13 +++
 bolt/unittests/Core/CMakeLists.txt         |   3 +
 bolt/unittests/Core/MemoryMaps.cpp         | 113 +++++++++++++++++++++
 4 files changed, 134 insertions(+)
 create mode 100644 bolt/unittests/Core/MemoryMaps.cpp

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 6453b3070ceb8d..3c8fe57a1faf40 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -492,6 +492,11 @@ class DataAggregator : public DataReader {
   /// and return a file name matching a given \p FileBuildID.
   std::optional<StringRef> getFileNameForBuildID(StringRef FileBuildID);
 
+  /// Get a constant reference to the parsed binary mmap entries.
+  const std::unordered_map<uint64_t, MMapInfo> &getBinaryMMapInfo() {
+    return BinaryMMapInfo;
+  }
+
   friend class YAMLProfileWriter;
 };
 } // namespace bolt
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 813d825f8b570c..ddfc34dfbf254a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -95,6 +95,12 @@ cl::opt<bool> ReadPreAggregated(
     "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
     cl::cat(AggregatorCategory));
 
+cl::opt<std::string>
+    ReadPerfEvents("perf-script-events",
+                   cl::desc("skip perf event collection by supplying a "
+                            "perf-script output in a textual format"),
+                   cl::cat(AggregatorCategory));
+
 static cl::opt<bool>
 TimeAggregator("time-aggr",
   cl::desc("time BOLT aggregator"),
@@ -464,6 +470,13 @@ void DataAggregator::filterBinaryMMapInfo() {
 
 int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
                                    PerfProcessErrorCallbackTy Callback) {
+  if (!opts::ReadPerfEvents.empty()) {
+    dbgs() << "PERF2BOLT: using pre-processed perf events for '" << Name
+           << "' (perf-script-events)\n";
+    ParsingBuf = opts::ReadPerfEvents;
+    return 0;
+  }
+
   std::string Error;
   outs() << "PERF2BOLT: waiting for perf " << Name
          << " collection to finish...\n";
diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt
index bad7108dad0b7b..208cf6ced73585 100644
--- a/bolt/unittests/Core/CMakeLists.txt
+++ b/bolt/unittests/Core/CMakeLists.txt
@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
 add_bolt_unittest(CoreTests
   BinaryContext.cpp
   MCPlusBuilder.cpp
+  MemoryMaps.cpp
   DynoStats.cpp
 
   DISABLE_LLVM_LINK_LLVM_DYLIB
@@ -17,6 +18,8 @@ target_link_libraries(CoreTests
   PRIVATE
   LLVMBOLTCore
   LLVMBOLTRewrite
+  LLVMBOLTProfile
+  LLVMTestingSupport
   )
 
 foreach (tgt ${BOLT_TARGETS_TO_BUILD})
diff --git a/bolt/unittests/Core/MemoryMaps.cpp b/bolt/unittests/Core/MemoryMaps.cpp
new file mode 100644
index 00000000000000..19c98c29ec8b4b
--- /dev/null
+++ b/bolt/unittests/Core/MemoryMaps.cpp
@@ -0,0 +1,113 @@
+//===- bolt/unittest/Core/MemoryMaps.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Profile/DataAggregator.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+using namespace bolt;
+
+namespace opts {
+extern cl::opt<std::string> ReadPerfEvents;
+} // namespace opts
+
+namespace {
+struct MemoryMapsTester : public testing::TestWithParam<Triple::ArchType> {
+  void SetUp() override {
+    initalizeLLVM();
+    prepareElf();
+    initializeBOLT();
+  }
+
+protected:
+  void initalizeLLVM() {
+    llvm::InitializeAllTargetInfos();
+    llvm::InitializeAllTargetMCs();
+    llvm::InitializeAllAsmParsers();
+    llvm::InitializeAllDisassemblers();
+    llvm::InitializeAllTargets();
+    llvm::InitializeAllAsmPrinters();
+  }
+
+  void prepareElf() {
+    memcpy(ElfBuf, "\177ELF", 4);
+    ELF64LE::Ehdr *EHdr = reinterpret_cast<typename ELF64LE::Ehdr *>(ElfBuf);
+    EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64;
+    EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB;
+    EHdr->e_machine = GetParam() == Triple::aarch64 ? EM_AARCH64 : EM_X86_64;
+    MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF");
+    ObjFile = cantFail(ObjectFile::createObjectFile(Source));
+  }
+
+  void initializeBOLT() {
+    Relocation::Arch = ObjFile->makeTriple().getArch();
+    BC = cantFail(BinaryContext::createBinaryContext(
+        ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true,
+        DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()}));
+    ASSERT_FALSE(!BC);
+  }
+
+  char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {};
+  std::unique_ptr<ObjectFile> ObjFile;
+  std::unique_ptr<BinaryContext> BC;
+};
+} // namespace
+
+#ifdef X86_AVAILABLE
+
+INSTANTIATE_TEST_SUITE_P(X86, MemoryMapsTester,
+                         ::testing::Values(Triple::x86_64));
+
+#endif
+
+#ifdef AARCH64_AVAILABLE
+
+INSTANTIATE_TEST_SUITE_P(AArch64, MemoryMapsTester,
+                         ::testing::Values(Triple::aarch64));
+
+#endif
+
+/// Check that the correct mmap size is computed when we have multiple text
+/// segment mappings. Uses 'opts::ReadPerfEvents' flag to pass a custom 'perf
+/// script' output, along with two text segments (SegmentInfo).
+TEST_P(MemoryMapsTester, ParseMultipleSegments) {
+  const int Pid = 1234;
+  StringRef Filename = "BINARY";
+  opts::ReadPerfEvents = formatv(
+      "name       0 [000]     0.000000: PERF_RECORD_MMAP2 {0}/{0}: "
+      "[0xabc0000000(0x1000000) @ 0x11c0000 103:01 1573523 0]: r-xp {1}\n"
+      "name       0 [000]     0.000000: PERF_RECORD_MMAP2 {0}/{0}: "
+      "[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n",
+      Pid, Filename);
+
+  BC->SegmentMapInfo[0x11da000] =
+      SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000};
+  BC->SegmentMapInfo[0x31d0000] =
+      SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0000, 0x3000000, 0x200000};
+
+  DataAggregator DA("");
+  BC->setFilename(Filename);
+  Error Err = DA.preprocessProfile(*BC);
+
+  // Ignore errors from perf2bolt when parsing memory events later on.
+  ASSERT_THAT_ERROR(std::move(Err), Succeeded());
+
+  auto &BinaryMMapInfo = DA.getBinaryMMapInfo();
+  auto El = BinaryMMapInfo.find(Pid);
+  // Check that memory mapping is present and has the expected size.
+  ASSERT_NE(El, BinaryMMapInfo.end());
+  ASSERT_EQ(El->second.Size, static_cast<uint64_t>(0xb1d0000));
+}

>From d5a5aaa7601fe0cfd42642745d0a45f482613443 Mon Sep 17 00:00:00 2001
From: "Paschalis Mpeis (aws-mem-aarch64)" <paschalis.mpeis at arm.com>
Date: Wed, 24 Apr 2024 08:25:59 +0000
Subject: [PATCH 2/3] [BOLT] Fix heatmaps on large BOLTE'd binaries.

Large binaries get two text segments mapped when loaded in memory.
BOLT processes only the first, which is not having a correct BaseAddress,
causing a wrong computation of a BinaryMMapInfo's size.

Consequently, BOLT wrongly thinks that many of the samples fall outside
the binary and ignores them. As a result, the computed heatmap is
incomplete, and the section hotness statistics are wrong.

This bug is present in both the AArch64 and x86 backends.

---

This patch introduces flag 'perf-script-events' that allows passing
perf events without BOLT having to parse them using 'perf script'.
The flag is used to pass a mock perf profile that has two memory
mappings for a mock binary that has two text segments. The size of the
mapping is updated as `parseMMapEvents` now processes all text segments.

---

Example used in unit tests:
>From `/proc/<BINARY PID>/maps`, we have 2 text mappings, say A and B.

```
abc0000000-abc1000000 r-xp 011c0000 103:01 1573523 BINARY
abc2000000-abca000000 r-xp 031d0000 103:01 1573523 BINARY
```

Size of text mappings:

| Mapping |  Size  |
| ------- | ------ |
| A       | ~15MB  |
| B       | ~135MB |

---

Example on a real program:
```
2f7200000-2fabca000 r--p 00000000        bolted-binary
2fabd9000-2fe47c000 r-xp 039c9000        bolted-binary <- 1st txt segment
2fe48b000-2fe61d000 r--p 0727b000        bolted-binary
2fe62c000-2fe660000 rw-p 0740c000        bolted-binary
2fe660000-2fea4c000 rw-p 00000000
2fec00000-303dad000 r-xp 07a00000        bolted-binary <- 2nd (appears only on the bolted binary)
```
---
 bolt/lib/Profile/DataAggregator.cpp | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index ddfc34dfbf254a..1b4d355c7d064b 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2017,15 +2017,6 @@ std::error_code DataAggregator::parseMMapEvents() {
     if (FileMMapInfo.first == "(deleted)")
       continue;
 
-    // Consider only the first mapping of the file for any given PID
-    auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
-    bool PIDExists = llvm::any_of(make_range(Range), [&](const auto &MI) {
-      return MI.second.PID == FileMMapInfo.second.PID;
-    });
-
-    if (PIDExists)
-      continue;
-
     GlobalMMapInfo.insert(FileMMapInfo);
   }
 
@@ -2076,12 +2067,18 @@ std::error_code DataAggregator::parseMMapEvents() {
                << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
                << ". Ignoring profile data for this mapping\n";
         continue;
-      } else {
-        MMapInfo.BaseAddress = *BaseAddress;
       }
+      MMapInfo.BaseAddress = *BaseAddress;
     }
 
+    // Try to add MMapInfo to the map and update its size. Large binaries
+    // may span multiple text segments, so the mapping is inserted only on the
+    // first occurrence. If a larger section size is found, it will be updated.
     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
+    uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size;
+    uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress;
+    if (Size > BinaryMMapInfo[MMapInfo.PID].Size)
+      BinaryMMapInfo[MMapInfo.PID].Size = Size;
   }
 
   if (BinaryMMapInfo.empty()) {

>From b1765dc9b9f2407b20562213c8ff0fe55cfb0e33 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Wed, 28 Aug 2024 16:08:47 +0100
Subject: [PATCH 3/3] Attempt to fix build bot failures.

- Added to CoreTests in BUILD.gn
- Hiding DataAggregator std out/err outputs
---
 bolt/unittests/Core/MemoryMaps.cpp                   | 4 ++++
 llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn | 1 +
 2 files changed, 5 insertions(+)

diff --git a/bolt/unittests/Core/MemoryMaps.cpp b/bolt/unittests/Core/MemoryMaps.cpp
index 19c98c29ec8b4b..4329ff18af0441 100644
--- a/bolt/unittests/Core/MemoryMaps.cpp
+++ b/bolt/unittests/Core/MemoryMaps.cpp
@@ -98,6 +98,10 @@ TEST_P(MemoryMapsTester, ParseMultipleSegments) {
   BC->SegmentMapInfo[0x31d0000] =
       SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0000, 0x3000000, 0x200000};
 
+  // Dont show DataAggregators out/err output.
+  testing::internal::CaptureStdout();
+  testing::internal::CaptureStderr();
+
   DataAggregator DA("");
   BC->setFilename(Filename);
   Error Err = DA.preprocessProfile(*BC);
diff --git a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn
index 51dc24481a513b..945d31afca10f0 100644
--- a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn
+++ b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn
@@ -15,6 +15,7 @@ unittest("CoreTests") {
     "BinaryContext.cpp",
     "DynoStats.cpp",
     "MCPlusBuilder.cpp",
+    "MemoryMaps.cpp",
   ]
 
   defines = []



More information about the llvm-commits mailing list