[llvm] [perf2bolt] Improve heuristic to map in-process addresses to specific… (PR #109397)

Kristof Beyls via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 20 03:02:50 PDT 2024


https://github.com/kbeyls updated https://github.com/llvm/llvm-project/pull/109397

>From 74fe936b39190263fa1dcb199cb2e9d82a306f2c Mon Sep 17 00:00:00 2001
From: Kristof Beyls <kristof.beyls at arm.com>
Date: Thu, 19 Sep 2024 15:34:04 +0100
Subject: [PATCH 1/2] [perf2bolt] Improve heuristic to map in-process addresses
 to specific segments in Elf binary.

The heuristic is improved by also taking into account that only executable
segments should contain instructions.

Fixes #109384.
---
 bolt/include/bolt/Core/BinaryContext.h |  5 +++-
 bolt/lib/Core/BinaryContext.cpp        |  3 +++
 bolt/lib/Profile/DataAggregator.cpp    |  3 ++-
 bolt/lib/Rewrite/RewriteInstance.cpp   |  8 +++---
 bolt/unittests/Core/BinaryContext.cpp  | 35 ++++++++++++++++++++------
 5 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 5fb32a1ea67848..fb73890d6e976a 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -71,6 +71,7 @@ struct SegmentInfo {
   uint64_t FileOffset;        /// Offset in the file.
   uint64_t FileSize;          /// Size in file.
   uint64_t Alignment;         /// Alignment of the segment.
+  bool     IsExecutable;      /// Is the executable bit set on the Segment?
 
   void print(raw_ostream &OS) const {
     OS << "SegmentInfo { Address: 0x"
@@ -78,7 +79,9 @@ struct SegmentInfo {
        << Twine::utohexstr(Size) << ", FileOffset: 0x"
        << Twine::utohexstr(FileOffset) << ", FileSize: 0x"
        << Twine::utohexstr(FileSize) << ", Alignment: 0x"
-       << Twine::utohexstr(Alignment) << "}";
+       << Twine::utohexstr(Alignment)
+       << ", " << (IsExecutable?"x":" ")
+       << "}";
   };
 };
 
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index cd137f457c1bdc..1347047e1b7060 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2021,6 +2021,9 @@ BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
   // Find a segment with a matching file offset.
   for (auto &KV : SegmentMapInfo) {
     const SegmentInfo &SegInfo = KV.second;
+    // Only consider executable segments.
+    if (!SegInfo.IsExecutable)
+      continue;
     // FileOffset is got from perf event,
     // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
     // If the pagesize is not equal to SegInfo.Alignment.
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 813d825f8b570c..746b57cc1e4048 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2043,7 +2043,8 @@ std::error_code DataAggregator::parseMMapEvents() {
             // size of the mapping, but we know it should not exceed the segment
             // alignment value. Hence we are performing an approximate check.
             return SegInfo.Address >= MMapInfo.MMapAddress &&
-                   SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment;
+                   SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment &&
+                   SegInfo.IsExecutable;
           });
       if (!MatchFound) {
         errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index adacb50dc167c8..3040ba589b036d 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -526,11 +526,9 @@ Error RewriteInstance::discoverStorage() {
       NextAvailableOffset = std::max(NextAvailableOffset,
                                      Phdr.p_offset + Phdr.p_filesz);
 
-      BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr,
-                                                     Phdr.p_memsz,
-                                                     Phdr.p_offset,
-                                                     Phdr.p_filesz,
-                                                     Phdr.p_align};
+      BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{
+          Phdr.p_vaddr,  Phdr.p_memsz, Phdr.p_offset,
+          Phdr.p_filesz, Phdr.p_align, ((Phdr.p_flags & ELF::PF_X) != 0)};
       if (BC->TheTriple->getArch() == llvm::Triple::x86_64 &&
           Phdr.p_vaddr >= BinaryContext::KernelStartX86_64)
         BC->IsLinuxKernel = true;
diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp
index 6c3288146b7905..6bbfc0da71c268 100644
--- a/bolt/unittests/Core/BinaryContext.cpp
+++ b/bolt/unittests/Core/BinaryContext.cpp
@@ -160,13 +160,13 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) {
 TEST_P(BinaryContextTester, BaseAddress) {
   // Check that  base address calculation is correct for a binary with the
   // following segment layout:
-  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000};
+  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000, true};
   BC->SegmentMapInfo[0x10e8d2b4] =
-      SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000};
+      SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000, true};
   BC->SegmentMapInfo[0x4a3bddc0] =
-      SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000};
+      SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000, true};
   BC->SegmentMapInfo[0x4b84d5e8] =
-      SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000};
+      SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000, true};
 
   std::optional<uint64_t> BaseAddress =
       BC->getBaseAddressForMapping(0x7f13f5556000, 0x10e8c000);
@@ -181,13 +181,13 @@ TEST_P(BinaryContextTester, BaseAddress2) {
   // Check that base address calculation is correct for a binary if the
   // alignment in ELF file are different from pagesize.
   // The segment layout is as follows:
-  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000};
+  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000, true};
   BC->SegmentMapInfo[0x31860] =
-      SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000};
+      SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000, true};
   BC->SegmentMapInfo[0x41c20] =
-      SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000};
+      SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000, true};
   BC->SegmentMapInfo[0x54e18] =
-      SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000};
+      SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000, true};
 
   std::optional<uint64_t> BaseAddress =
       BC->getBaseAddressForMapping(0xaaaaea444000, 0x21000);
@@ -197,3 +197,22 @@ TEST_P(BinaryContextTester, BaseAddress2) {
   BaseAddress = BC->getBaseAddressForMapping(0xaaaaea444000, 0x11000);
   ASSERT_FALSE(BaseAddress.has_value());
 }
+
+TEST_P(BinaryContextTester, BaseAddressSegmentsSmallerThanAlignment) {
+  // Check that the correct segment is used to compute the base address
+  // when multiple segments are close together in the ELF file (closer
+  // than the required alignment in the process space).
+  // See https://github.com/llvm/llvm-project/issues/109384
+  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x1d1c, 0, 0x1d1c, 0x10000, false};
+  BC->SegmentMapInfo[0x11d40] =
+      SegmentInfo{0x11d40, 0x11e0, 0x1d40, 0x11e0, 0x10000, true};
+  BC->SegmentMapInfo[0x22f20] =
+      SegmentInfo{0x22f20, 0x10e0, 0x2f20, 0x1f0, 0x10000, false};
+  BC->SegmentMapInfo[0x33110] =
+      SegmentInfo{0x33110, 0x89, 0x3110, 0x88, 0x10000, false};
+
+  std::optional<uint64_t> BaseAddress =
+      BC->getBaseAddressForMapping(0xaaaaaaab1000, 0x1000);
+  ASSERT_TRUE(BaseAddress.has_value());
+  ASSERT_EQ(*BaseAddress, 0xaaaaaaaa0000ULL);
+}
\ No newline at end of file

>From b007be54f6fe2e82d3c8d63224e51d9eb30535ff Mon Sep 17 00:00:00 2001
From: Kristof Beyls <kristof.beyls at arm.com>
Date: Fri, 20 Sep 2024 12:01:05 +0200
Subject: [PATCH 2/2] Fix formatting

---
 bolt/include/bolt/Core/BinaryContext.h | 10 ++++------
 bolt/unittests/Core/BinaryContext.cpp  |  3 ++-
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index fb73890d6e976a..08ce892054874c 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -71,16 +71,14 @@ struct SegmentInfo {
   uint64_t FileOffset;        /// Offset in the file.
   uint64_t FileSize;          /// Size in file.
   uint64_t Alignment;         /// Alignment of the segment.
-  bool     IsExecutable;      /// Is the executable bit set on the Segment?
+  bool IsExecutable;          /// Is the executable bit set on the Segment?
 
   void print(raw_ostream &OS) const {
-    OS << "SegmentInfo { Address: 0x"
-       << Twine::utohexstr(Address) << ", Size: 0x"
-       << Twine::utohexstr(Size) << ", FileOffset: 0x"
+    OS << "SegmentInfo { Address: 0x" << Twine::utohexstr(Address)
+       << ", Size: 0x" << Twine::utohexstr(Size) << ", FileOffset: 0x"
        << Twine::utohexstr(FileOffset) << ", FileSize: 0x"
        << Twine::utohexstr(FileSize) << ", Alignment: 0x"
-       << Twine::utohexstr(Alignment)
-       << ", " << (IsExecutable?"x":" ")
+       << Twine::utohexstr(Alignment) << ", " << (IsExecutable ? "x" : " ")
        << "}";
   };
 };
diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp
index 6bbfc0da71c268..05b898d34af56c 100644
--- a/bolt/unittests/Core/BinaryContext.cpp
+++ b/bolt/unittests/Core/BinaryContext.cpp
@@ -160,7 +160,8 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) {
 TEST_P(BinaryContextTester, BaseAddress) {
   // Check that  base address calculation is correct for a binary with the
   // following segment layout:
-  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000, true};
+  BC->SegmentMapInfo[0] =
+      SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000, true};
   BC->SegmentMapInfo[0x10e8d2b4] =
       SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000, true};
   BC->SegmentMapInfo[0x4a3bddc0] =



More information about the llvm-commits mailing list