[llvm] [BOLT] Fix heatmaps on large BOLTEd binaries. (PR #92815)
Paschalis Mpeis via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 03:11:52 PDT 2024
https://github.com/paschalis-mpeis updated https://github.com/llvm/llvm-project/pull/92815
>From 505e186842c02e718a127134b64c11ebc493c87a Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Tue, 27 Aug 2024 18:18:07 +0100
Subject: [PATCH 1/3] BOLT fails to read correctly the size of multi-segment
mmaps.
---
bolt/include/bolt/Profile/DataAggregator.h | 5 +
bolt/lib/Profile/DataAggregator.cpp | 13 +++
bolt/unittests/Core/CMakeLists.txt | 3 +
bolt/unittests/Core/MemoryMaps.cpp | 113 +++++++++++++++++++++
4 files changed, 134 insertions(+)
create mode 100644 bolt/unittests/Core/MemoryMaps.cpp
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 6453b3070ceb8d..3c8fe57a1faf40 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -492,6 +492,11 @@ class DataAggregator : public DataReader {
/// and return a file name matching a given \p FileBuildID.
std::optional<StringRef> getFileNameForBuildID(StringRef FileBuildID);
+ /// Get a constant reference to the parsed binary mmap entries.
+ const std::unordered_map<uint64_t, MMapInfo> &getBinaryMMapInfo() {
+ return BinaryMMapInfo;
+ }
+
friend class YAMLProfileWriter;
};
} // namespace bolt
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 813d825f8b570c..ddfc34dfbf254a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -95,6 +95,12 @@ cl::opt<bool> ReadPreAggregated(
"pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
cl::cat(AggregatorCategory));
+cl::opt<std::string>
+ ReadPerfEvents("perf-script-events",
+ cl::desc("skip perf event collection by supplying a "
+ "perf-script output in a textual format"),
+ cl::cat(AggregatorCategory));
+
static cl::opt<bool>
TimeAggregator("time-aggr",
cl::desc("time BOLT aggregator"),
@@ -464,6 +470,13 @@ void DataAggregator::filterBinaryMMapInfo() {
int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
PerfProcessErrorCallbackTy Callback) {
+ if (!opts::ReadPerfEvents.empty()) {
+ dbgs() << "PERF2BOLT: using pre-processed perf events for '" << Name
+ << "' (perf-script-events)\n";
+ ParsingBuf = opts::ReadPerfEvents;
+ return 0;
+ }
+
std::string Error;
outs() << "PERF2BOLT: waiting for perf " << Name
<< " collection to finish...\n";
diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt
index bad7108dad0b7b..208cf6ced73585 100644
--- a/bolt/unittests/Core/CMakeLists.txt
+++ b/bolt/unittests/Core/CMakeLists.txt
@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
add_bolt_unittest(CoreTests
BinaryContext.cpp
MCPlusBuilder.cpp
+ MemoryMaps.cpp
DynoStats.cpp
DISABLE_LLVM_LINK_LLVM_DYLIB
@@ -17,6 +18,8 @@ target_link_libraries(CoreTests
PRIVATE
LLVMBOLTCore
LLVMBOLTRewrite
+ LLVMBOLTProfile
+ LLVMTestingSupport
)
foreach (tgt ${BOLT_TARGETS_TO_BUILD})
diff --git a/bolt/unittests/Core/MemoryMaps.cpp b/bolt/unittests/Core/MemoryMaps.cpp
new file mode 100644
index 00000000000000..19c98c29ec8b4b
--- /dev/null
+++ b/bolt/unittests/Core/MemoryMaps.cpp
@@ -0,0 +1,113 @@
+//===- bolt/unittest/Core/MemoryMaps.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Profile/DataAggregator.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+using namespace bolt;
+
+namespace opts {
+extern cl::opt<std::string> ReadPerfEvents;
+} // namespace opts
+
+namespace {
+struct MemoryMapsTester : public testing::TestWithParam<Triple::ArchType> {
+ void SetUp() override {
+ initalizeLLVM();
+ prepareElf();
+ initializeBOLT();
+ }
+
+protected:
+ void initalizeLLVM() {
+ llvm::InitializeAllTargetInfos();
+ llvm::InitializeAllTargetMCs();
+ llvm::InitializeAllAsmParsers();
+ llvm::InitializeAllDisassemblers();
+ llvm::InitializeAllTargets();
+ llvm::InitializeAllAsmPrinters();
+ }
+
+ void prepareElf() {
+ memcpy(ElfBuf, "\177ELF", 4);
+ ELF64LE::Ehdr *EHdr = reinterpret_cast<typename ELF64LE::Ehdr *>(ElfBuf);
+ EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64;
+ EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB;
+ EHdr->e_machine = GetParam() == Triple::aarch64 ? EM_AARCH64 : EM_X86_64;
+ MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF");
+ ObjFile = cantFail(ObjectFile::createObjectFile(Source));
+ }
+
+ void initializeBOLT() {
+ Relocation::Arch = ObjFile->makeTriple().getArch();
+ BC = cantFail(BinaryContext::createBinaryContext(
+ ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true,
+ DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()}));
+ ASSERT_FALSE(!BC);
+ }
+
+ char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {};
+ std::unique_ptr<ObjectFile> ObjFile;
+ std::unique_ptr<BinaryContext> BC;
+};
+} // namespace
+
+#ifdef X86_AVAILABLE
+
+INSTANTIATE_TEST_SUITE_P(X86, MemoryMapsTester,
+ ::testing::Values(Triple::x86_64));
+
+#endif
+
+#ifdef AARCH64_AVAILABLE
+
+INSTANTIATE_TEST_SUITE_P(AArch64, MemoryMapsTester,
+ ::testing::Values(Triple::aarch64));
+
+#endif
+
+/// Check that the correct mmap size is computed when we have multiple text
+/// segment mappings. Uses 'opts::ReadPerfEvents' flag to pass a custom 'perf
+/// script' output, along with two text segments (SegmentInfo).
+TEST_P(MemoryMapsTester, ParseMultipleSegments) {
+ const int Pid = 1234;
+ StringRef Filename = "BINARY";
+ opts::ReadPerfEvents = formatv(
+ "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: "
+ "[0xabc0000000(0x1000000) @ 0x11c0000 103:01 1573523 0]: r-xp {1}\n"
+ "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: "
+ "[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n",
+ Pid, Filename);
+
+ BC->SegmentMapInfo[0x11da000] =
+ SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000};
+ BC->SegmentMapInfo[0x31d0000] =
+ SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0000, 0x3000000, 0x200000};
+
+ DataAggregator DA("");
+ BC->setFilename(Filename);
+ Error Err = DA.preprocessProfile(*BC);
+
+ // Ignore errors from perf2bolt when parsing memory events later on.
+ ASSERT_THAT_ERROR(std::move(Err), Succeeded());
+
+ auto &BinaryMMapInfo = DA.getBinaryMMapInfo();
+ auto El = BinaryMMapInfo.find(Pid);
+ // Check that memory mapping is present and has the expected size.
+ ASSERT_NE(El, BinaryMMapInfo.end());
+ ASSERT_EQ(El->second.Size, static_cast<uint64_t>(0xb1d0000));
+}
>From d5a5aaa7601fe0cfd42642745d0a45f482613443 Mon Sep 17 00:00:00 2001
From: "Paschalis Mpeis (aws-mem-aarch64)" <paschalis.mpeis at arm.com>
Date: Wed, 24 Apr 2024 08:25:59 +0000
Subject: [PATCH 2/3] [BOLT] Fix heatmaps on large BOLTE'd binaries.
Large binaries get two text segments mapped when loaded in memory.
BOLT processes only the first, which is not having a correct BaseAddress,
causing a wrong computation of a BinaryMMapInfo's size.
Consequently, BOLT wrongly thinks that many of the samples fall outside
the binary and ignores them. As a result, the computed heatmap is
incomplete, and the section hotness statistics are wrong.
This bug is present in both the AArch64 and x86 backends.
---
This patch introduces flag 'perf-script-events' that allows passing
perf events without BOLT having to parse them using 'perf script'.
The flag is used to pass a mock perf profile that has two memory
mappings for a mock binary that has two text segments. The size of the
mapping is updated as `parseMMapEvents` now processes all text segments.
---
Example used in unit tests:
>From `/proc/<BINARY PID>/maps`, we have 2 text mappings, say A and B.
```
abc0000000-abc1000000 r-xp 011c0000 103:01 1573523 BINARY
abc2000000-abca000000 r-xp 031d0000 103:01 1573523 BINARY
```
Size of text mappings:
| Mapping | Size |
| ------- | ------ |
| A | ~15MB |
| B | ~135MB |
---
Example on a real program:
```
2f7200000-2fabca000 r--p 00000000 bolted-binary
2fabd9000-2fe47c000 r-xp 039c9000 bolted-binary <- 1st txt segment
2fe48b000-2fe61d000 r--p 0727b000 bolted-binary
2fe62c000-2fe660000 rw-p 0740c000 bolted-binary
2fe660000-2fea4c000 rw-p 00000000
2fec00000-303dad000 r-xp 07a00000 bolted-binary <- 2nd (appears only on the bolted binary)
```
---
bolt/lib/Profile/DataAggregator.cpp | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index ddfc34dfbf254a..1b4d355c7d064b 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2017,15 +2017,6 @@ std::error_code DataAggregator::parseMMapEvents() {
if (FileMMapInfo.first == "(deleted)")
continue;
- // Consider only the first mapping of the file for any given PID
- auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
- bool PIDExists = llvm::any_of(make_range(Range), [&](const auto &MI) {
- return MI.second.PID == FileMMapInfo.second.PID;
- });
-
- if (PIDExists)
- continue;
-
GlobalMMapInfo.insert(FileMMapInfo);
}
@@ -2076,12 +2067,18 @@ std::error_code DataAggregator::parseMMapEvents() {
<< " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
<< ". Ignoring profile data for this mapping\n";
continue;
- } else {
- MMapInfo.BaseAddress = *BaseAddress;
}
+ MMapInfo.BaseAddress = *BaseAddress;
}
+ // Try to add MMapInfo to the map and update its size. Large binaries
+ // may span multiple text segments, so the mapping is inserted only on the
+ // first occurrence. If a larger section size is found, it will be updated.
BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
+ uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size;
+ uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress;
+ if (Size > BinaryMMapInfo[MMapInfo.PID].Size)
+ BinaryMMapInfo[MMapInfo.PID].Size = Size;
}
if (BinaryMMapInfo.empty()) {
>From b1765dc9b9f2407b20562213c8ff0fe55cfb0e33 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Wed, 28 Aug 2024 16:08:47 +0100
Subject: [PATCH 3/3] Attempt to fix build bot failures.
- Added to CoreTests in BUILD.gn
- Hiding DataAggregator std out/err outputs
---
bolt/unittests/Core/MemoryMaps.cpp | 4 ++++
llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn | 1 +
2 files changed, 5 insertions(+)
diff --git a/bolt/unittests/Core/MemoryMaps.cpp b/bolt/unittests/Core/MemoryMaps.cpp
index 19c98c29ec8b4b..4329ff18af0441 100644
--- a/bolt/unittests/Core/MemoryMaps.cpp
+++ b/bolt/unittests/Core/MemoryMaps.cpp
@@ -98,6 +98,10 @@ TEST_P(MemoryMapsTester, ParseMultipleSegments) {
BC->SegmentMapInfo[0x31d0000] =
SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0000, 0x3000000, 0x200000};
+ // Dont show DataAggregators out/err output.
+ testing::internal::CaptureStdout();
+ testing::internal::CaptureStderr();
+
DataAggregator DA("");
BC->setFilename(Filename);
Error Err = DA.preprocessProfile(*BC);
diff --git a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn
index 51dc24481a513b..945d31afca10f0 100644
--- a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn
+++ b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn
@@ -15,6 +15,7 @@ unittest("CoreTests") {
"BinaryContext.cpp",
"DynoStats.cpp",
"MCPlusBuilder.cpp",
+ "MemoryMaps.cpp",
]
defines = []
More information about the llvm-commits
mailing list