[llvm] [perf2bolt] Improve heuristic to map in-process addresses to specific… (PR #109397)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 20 02:44:12 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Kristof Beyls (kbeyls)

<details>
<summary>Changes</summary>

… segments in Elf binary.

The heuristic is improved by also taking into account that only executable segments should contain instructions.

Fixes #<!-- -->109384.

---
Full diff: https://github.com/llvm/llvm-project/pull/109397.diff


5 Files Affected:

- (modified) bolt/include/bolt/Core/BinaryContext.h (+4-1) 
- (modified) bolt/lib/Core/BinaryContext.cpp (+3) 
- (modified) bolt/lib/Profile/DataAggregator.cpp (+2-1) 
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+3-5) 
- (modified) bolt/unittests/Core/BinaryContext.cpp (+27-8) 


``````````diff
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 5fb32a1ea67848..fb73890d6e976a 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -71,6 +71,7 @@ struct SegmentInfo {
   uint64_t FileOffset;        /// Offset in the file.
   uint64_t FileSize;          /// Size in file.
   uint64_t Alignment;         /// Alignment of the segment.
+  bool     IsExecutable;      /// Is the executable bit set on the Segment?
 
   void print(raw_ostream &OS) const {
     OS << "SegmentInfo { Address: 0x"
@@ -78,7 +79,9 @@ struct SegmentInfo {
        << Twine::utohexstr(Size) << ", FileOffset: 0x"
        << Twine::utohexstr(FileOffset) << ", FileSize: 0x"
        << Twine::utohexstr(FileSize) << ", Alignment: 0x"
-       << Twine::utohexstr(Alignment) << "}";
+       << Twine::utohexstr(Alignment)
+       << ", " << (IsExecutable?"x":" ")
+       << "}";
   };
 };
 
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index cd137f457c1bdc..1347047e1b7060 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2021,6 +2021,9 @@ BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
   // Find a segment with a matching file offset.
   for (auto &KV : SegmentMapInfo) {
     const SegmentInfo &SegInfo = KV.second;
+    // Only consider executable segments.
+    if (!SegInfo.IsExecutable)
+      continue;
     // FileOffset is got from perf event,
     // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
     // If the pagesize is not equal to SegInfo.Alignment.
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 813d825f8b570c..746b57cc1e4048 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2043,7 +2043,8 @@ std::error_code DataAggregator::parseMMapEvents() {
             // size of the mapping, but we know it should not exceed the segment
             // alignment value. Hence we are performing an approximate check.
             return SegInfo.Address >= MMapInfo.MMapAddress &&
-                   SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment;
+                   SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment &&
+                   SegInfo.IsExecutable;
           });
       if (!MatchFound) {
         errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index adacb50dc167c8..3040ba589b036d 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -526,11 +526,9 @@ Error RewriteInstance::discoverStorage() {
       NextAvailableOffset = std::max(NextAvailableOffset,
                                      Phdr.p_offset + Phdr.p_filesz);
 
-      BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr,
-                                                     Phdr.p_memsz,
-                                                     Phdr.p_offset,
-                                                     Phdr.p_filesz,
-                                                     Phdr.p_align};
+      BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{
+          Phdr.p_vaddr,  Phdr.p_memsz, Phdr.p_offset,
+          Phdr.p_filesz, Phdr.p_align, ((Phdr.p_flags & ELF::PF_X) != 0)};
       if (BC->TheTriple->getArch() == llvm::Triple::x86_64 &&
           Phdr.p_vaddr >= BinaryContext::KernelStartX86_64)
         BC->IsLinuxKernel = true;
diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp
index 6c3288146b7905..6bbfc0da71c268 100644
--- a/bolt/unittests/Core/BinaryContext.cpp
+++ b/bolt/unittests/Core/BinaryContext.cpp
@@ -160,13 +160,13 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) {
 TEST_P(BinaryContextTester, BaseAddress) {
   // Check that  base address calculation is correct for a binary with the
   // following segment layout:
-  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000};
+  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000, true};
   BC->SegmentMapInfo[0x10e8d2b4] =
-      SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000};
+      SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000, true};
   BC->SegmentMapInfo[0x4a3bddc0] =
-      SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000};
+      SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000, true};
   BC->SegmentMapInfo[0x4b84d5e8] =
-      SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000};
+      SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000, true};
 
   std::optional<uint64_t> BaseAddress =
       BC->getBaseAddressForMapping(0x7f13f5556000, 0x10e8c000);
@@ -181,13 +181,13 @@ TEST_P(BinaryContextTester, BaseAddress2) {
   // Check that base address calculation is correct for a binary if the
   // alignment in ELF file are different from pagesize.
   // The segment layout is as follows:
-  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000};
+  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000, true};
   BC->SegmentMapInfo[0x31860] =
-      SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000};
+      SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000, true};
   BC->SegmentMapInfo[0x41c20] =
-      SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000};
+      SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000, true};
   BC->SegmentMapInfo[0x54e18] =
-      SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000};
+      SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000, true};
 
   std::optional<uint64_t> BaseAddress =
       BC->getBaseAddressForMapping(0xaaaaea444000, 0x21000);
@@ -197,3 +197,22 @@ TEST_P(BinaryContextTester, BaseAddress2) {
   BaseAddress = BC->getBaseAddressForMapping(0xaaaaea444000, 0x11000);
   ASSERT_FALSE(BaseAddress.has_value());
 }
+
+TEST_P(BinaryContextTester, BaseAddressSegmentsSmallerThanAlignment) {
+  // Check that the correct segment is used to compute the base address
+  // when multiple segments are close together in the ELF file (closer
+  // than the required alignment in the process space).
+  // See https://github.com/llvm/llvm-project/issues/109384
+  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x1d1c, 0, 0x1d1c, 0x10000, false};
+  BC->SegmentMapInfo[0x11d40] =
+      SegmentInfo{0x11d40, 0x11e0, 0x1d40, 0x11e0, 0x10000, true};
+  BC->SegmentMapInfo[0x22f20] =
+      SegmentInfo{0x22f20, 0x10e0, 0x2f20, 0x1f0, 0x10000, false};
+  BC->SegmentMapInfo[0x33110] =
+      SegmentInfo{0x33110, 0x89, 0x3110, 0x88, 0x10000, false};
+
+  std::optional<uint64_t> BaseAddress =
+      BC->getBaseAddressForMapping(0xaaaaaaab1000, 0x1000);
+  ASSERT_TRUE(BaseAddress.has_value());
+  ASSERT_EQ(*BaseAddress, 0xaaaaaaaa0000ULL);
+}
\ No newline at end of file

``````````

</details>


https://github.com/llvm/llvm-project/pull/109397


More information about the llvm-commits mailing list