[llvm] [BOLT] Decouple new segment creation from PHDR rewrite (PR #146111)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 27 09:39:08 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Maksim Panchenko (maksfb)

<details>
<summary>Changes</summary>

Refactor handling of PHDR table rewrite to make modifications easier.

---
Full diff: https://github.com/llvm/llvm-project/pull/146111.diff


5 Files Affected:

- (modified) bolt/include/bolt/Core/BinaryContext.h (+8-2) 
- (modified) bolt/include/bolt/Rewrite/RewriteInstance.h (+3) 
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+102-100) 
- (modified) bolt/unittests/Core/BinaryContext.cpp (+17-15) 
- (modified) bolt/unittests/Core/MemoryMaps.cpp (+8-8) 


``````````diff
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 64e524e2f13d7..91ecf89da618c 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -73,14 +73,15 @@ struct SegmentInfo {
   uint64_t FileSize;          /// Size in file.
   uint64_t Alignment;         /// Alignment of the segment.
   bool IsExecutable;          /// Is the executable bit set on the Segment?
+  bool IsWritable;            /// Is the segment writable.
 
   void print(raw_ostream &OS) const {
     OS << "SegmentInfo { Address: 0x" << Twine::utohexstr(Address)
        << ", Size: 0x" << Twine::utohexstr(Size) << ", FileOffset: 0x"
        << Twine::utohexstr(FileOffset) << ", FileSize: 0x"
        << Twine::utohexstr(FileSize) << ", Alignment: 0x"
-       << Twine::utohexstr(Alignment) << ", " << (IsExecutable ? "x" : " ")
-       << "}";
+       << Twine::utohexstr(Alignment) << ", " << (IsExecutable ? "x" : "")
+       << (IsWritable ? "w" : "") << " }";
   };
 };
 
@@ -333,9 +334,14 @@ class BinaryContext {
                                   std::optional<StringRef> Source,
                                   unsigned CUID, unsigned DWARFVersion);
 
+  /// Input file segment info
+  ///
   /// [start memory address] -> [segment info] mapping.
   std::map<uint64_t, SegmentInfo> SegmentMapInfo;
 
+  /// Newly created segments.
+  std::vector<SegmentInfo> NewSegments;
+
   /// Symbols that are expected to be undefined in MCContext during emission.
   std::unordered_set<MCSymbol *> UndefinedSymbols;
 
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 94dd06e91d6b1..3556d0b0d76ee 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -297,6 +297,9 @@ class RewriteInstance {
     return FUNC(ELF64BE);                                                      \
   }
 
+  /// Update loadable segment information based on new sections.
+  void updateSegmentInfo();
+
   /// Patch ELF book-keeping info.
   void patchELFPHDRTable();
 
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 934768c244b31..6f733d3e31c5e 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -547,9 +547,14 @@ Error RewriteInstance::discoverStorage() {
       NextAvailableOffset = std::max(NextAvailableOffset,
                                      Phdr.p_offset + Phdr.p_filesz);
 
-      BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{
-          Phdr.p_vaddr,  Phdr.p_memsz, Phdr.p_offset,
-          Phdr.p_filesz, Phdr.p_align, ((Phdr.p_flags & ELF::PF_X) != 0)};
+      BC->SegmentMapInfo[Phdr.p_vaddr] =
+          SegmentInfo{Phdr.p_vaddr,
+                      Phdr.p_memsz,
+                      Phdr.p_offset,
+                      Phdr.p_filesz,
+                      Phdr.p_align,
+                      (Phdr.p_flags & ELF::PF_X) != 0,
+                      (Phdr.p_flags & ELF::PF_W) != 0};
       if (BC->TheTriple->getArch() == llvm::Triple::x86_64 &&
           Phdr.p_vaddr >= BinaryContext::KernelStartX86_64)
         BC->IsLinuxKernel = true;
@@ -4155,6 +4160,74 @@ void RewriteInstance::updateOutputValues(const BOLTLinker &Linker) {
     Function->updateOutputValues(Linker);
 }
 
+void RewriteInstance::updateSegmentInfo() {
+  // NOTE Currently .eh_frame_hdr appends to the last segment, recalculate
+  // last segments size based on the NextAvailableAddress variable.
+  if (!NewWritableSegmentSize) {
+    if (NewTextSegmentAddress)
+      NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
+  } else {
+    NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress;
+  }
+
+  if (NewTextSegmentSize) {
+    SegmentInfo TextSegment = {NewTextSegmentAddress,
+                               NewTextSegmentSize,
+                               NewTextSegmentOffset,
+                               NewTextSegmentSize,
+                               BC->PageAlign,
+                               true,
+                               false};
+    if (!opts::Instrument) {
+      BC->NewSegments.push_back(TextSegment);
+    } else {
+      ErrorOr<BinarySection &> Sec =
+          BC->getUniqueSectionByName(".bolt.instr.counters");
+      assert(Sec && "expected one and only one `.bolt.instr.counters` section");
+      const uint64_t Addr = Sec->getOutputAddress();
+      const uint64_t Offset = Sec->getOutputFileOffset();
+      const uint64_t Size = Sec->getOutputSize();
+      assert(Addr > TextSegment.Address &&
+             Addr + Size < TextSegment.Address + TextSegment.Size &&
+             "`.bolt.instr.counters` section is expected to be included in the "
+             "new text segment");
+
+      // Set correct size for the previous header since we are breaking the
+      // new text segment into three segments.
+      uint64_t Delta = Addr - TextSegment.Address;
+      TextSegment.Size = Delta;
+      TextSegment.FileSize = Delta;
+      BC->NewSegments.push_back(TextSegment);
+
+      // Create RW segment that includes the `.bolt.instr.counters` section.
+      SegmentInfo RWSegment = {Addr,  Size, Offset, Size, BC->RegularPageSize,
+                               false, true};
+      BC->NewSegments.push_back(RWSegment);
+
+      // Create RX segment that includes all RX sections from runtime library.
+      const uint64_t AddrRX = alignTo(Addr + Size, BC->RegularPageSize);
+      const uint64_t OffsetRX = alignTo(Offset + Size, BC->RegularPageSize);
+      const uint64_t SizeRX =
+          NewTextSegmentSize - (AddrRX - TextSegment.Address);
+      SegmentInfo RXSegment = {
+          AddrRX, SizeRX, OffsetRX, SizeRX, BC->RegularPageSize, true, false};
+      BC->NewSegments.push_back(RXSegment);
+    }
+  }
+
+  if (NewWritableSegmentSize) {
+    SegmentInfo DataSegmentInfo = {
+        NewWritableSegmentAddress,
+        NewWritableSegmentSize,
+        getFileOffsetForAddress(NewWritableSegmentAddress),
+        NewWritableSegmentSize,
+        BC->RegularPageSize,
+        false,
+        true};
+    BC->NewSegments.push_back(DataSegmentInfo);
+  }
+}
+
 void RewriteInstance::patchELFPHDRTable() {
   auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
   const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
@@ -4181,16 +4254,7 @@ void RewriteInstance::patchELFPHDRTable() {
   if (opts::Instrument)
     Phnum += 2;
 
-  // NOTE Currently .eh_frame_hdr appends to the last segment, recalculate
-  // last segments size based on the NextAvailableAddress variable.
-  if (!NewWritableSegmentSize) {
-    if (NewTextSegmentAddress)
-      NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
-  } else {
-    NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress;
-  }
-
-  if (!NewTextSegmentSize && !NewWritableSegmentSize) {
+  if (BC->NewSegments.empty()) {
     BC->outs() << "BOLT-INFO: not adding new segments\n";
     return;
   }
@@ -4198,90 +4262,28 @@ void RewriteInstance::patchELFPHDRTable() {
   const uint64_t SavedPos = OS.tell();
   OS.seek(PHDRTableOffset);
 
-  auto createNewPhdrs = [&]() {
-    SmallVector<ELF64LEPhdrTy, 3> NewPhdrs;
-    ELF64LEPhdrTy NewPhdr;
-    NewPhdr.p_type = ELF::PT_LOAD;
-    NewPhdr.p_offset = NewTextSegmentOffset;
-    NewPhdr.p_vaddr = NewTextSegmentAddress;
-    NewPhdr.p_paddr = NewTextSegmentAddress;
-    NewPhdr.p_filesz = NewTextSegmentSize;
-    NewPhdr.p_memsz = NewTextSegmentSize;
-    NewPhdr.p_flags = ELF::PF_X | ELF::PF_R;
-    NewPhdr.p_align = BC->PageAlign;
-
-    if (!opts::Instrument) {
-      NewPhdrs.push_back(NewPhdr);
-    } else {
-      ErrorOr<BinarySection &> Sec =
-          BC->getUniqueSectionByName(".bolt.instr.counters");
-      assert(Sec && "expected one and only one `.bolt.instr.counters` section");
-      const uint64_t Addr = Sec->getOutputAddress();
-      const uint64_t Offset = Sec->getOutputFileOffset();
-      const uint64_t Size = Sec->getOutputSize();
-      assert(Addr > NewPhdr.p_vaddr &&
-             Addr + Size < NewPhdr.p_vaddr + NewPhdr.p_memsz &&
-             "`.bolt.instr.counters` section is expected to be included in the "
-             "new text sgement");
-
-      // Set correct size for the previous header since we are breaking the
-      // new text segment into three segments.
-      uint64_t Delta = Addr - NewPhdr.p_vaddr;
-      NewPhdr.p_filesz = Delta;
-      NewPhdr.p_memsz = Delta;
-      NewPhdrs.push_back(NewPhdr);
-
-      // Create a program header for a RW segment that includes the
-      // `.bolt.instr.counters` section only.
-      ELF64LEPhdrTy NewPhdrRWSegment;
-      NewPhdrRWSegment.p_type = ELF::PT_LOAD;
-      NewPhdrRWSegment.p_offset = Offset;
-      NewPhdrRWSegment.p_vaddr = Addr;
-      NewPhdrRWSegment.p_paddr = Addr;
-      NewPhdrRWSegment.p_filesz = Size;
-      NewPhdrRWSegment.p_memsz = Size;
-      NewPhdrRWSegment.p_flags = ELF::PF_R | ELF::PF_W;
-      NewPhdrRWSegment.p_align = BC->RegularPageSize;
-      NewPhdrs.push_back(NewPhdrRWSegment);
-
-      // Create a program header for a RX segment that includes all the RX
-      // sections from runtime library.
-      ELF64LEPhdrTy NewPhdrRXSegment;
-      NewPhdrRXSegment.p_type = ELF::PT_LOAD;
-      const uint64_t AddrRX = alignTo(Addr + Size, BC->RegularPageSize);
-      const uint64_t OffsetRX = alignTo(Offset + Size, BC->RegularPageSize);
-      const uint64_t SizeRX = NewTextSegmentSize - (AddrRX - NewPhdr.p_paddr);
-      NewPhdrRXSegment.p_offset = OffsetRX;
-      NewPhdrRXSegment.p_vaddr = AddrRX;
-      NewPhdrRXSegment.p_paddr = AddrRX;
-      NewPhdrRXSegment.p_filesz = SizeRX;
-      NewPhdrRXSegment.p_memsz = SizeRX;
-      NewPhdrRXSegment.p_flags = ELF::PF_X | ELF::PF_R;
-      NewPhdrRXSegment.p_align = BC->RegularPageSize;
-      NewPhdrs.push_back(NewPhdrRXSegment);
-    }
-
-    return NewPhdrs;
+  auto createPhdr = [](const SegmentInfo &SI) {
+    ELF64LEPhdrTy Phdr;
+    Phdr.p_type = ELF::PT_LOAD;
+    Phdr.p_offset = SI.FileOffset;
+    Phdr.p_vaddr = SI.Address;
+    Phdr.p_paddr = SI.Address;
+    Phdr.p_filesz = SI.FileSize;
+    Phdr.p_memsz = SI.Size;
+    Phdr.p_flags = ELF::PF_R;
+    if (SI.IsExecutable)
+      Phdr.p_flags |= ELF::PF_X;
+    if (SI.IsWritable)
+      Phdr.p_flags |= ELF::PF_W;
+    Phdr.p_align = SI.Alignment;
+
+    return Phdr;
   };
 
   auto writeNewSegmentPhdrs = [&]() {
-    if (NewTextSegmentSize) {
-      SmallVector<ELF64LE::Phdr, 3> NewPhdrs = createNewPhdrs();
-      OS.write(reinterpret_cast<const char *>(NewPhdrs.data()),
-               sizeof(ELF64LE::Phdr) * NewPhdrs.size());
-    }
-
-    if (NewWritableSegmentSize) {
-      ELF64LEPhdrTy NewPhdr;
-      NewPhdr.p_type = ELF::PT_LOAD;
-      NewPhdr.p_offset = getFileOffsetForAddress(NewWritableSegmentAddress);
-      NewPhdr.p_vaddr = NewWritableSegmentAddress;
-      NewPhdr.p_paddr = NewWritableSegmentAddress;
-      NewPhdr.p_filesz = NewWritableSegmentSize;
-      NewPhdr.p_memsz = NewWritableSegmentSize;
-      NewPhdr.p_align = BC->RegularPageSize;
-      NewPhdr.p_flags = ELF::PF_R | ELF::PF_W;
-      OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr));
+    for (const SegmentInfo &SI : BC->NewSegments) {
+      ELF64LEPhdrTy Phdr = createPhdr(SI);
+      OS.write(reinterpret_cast<const char *>(&Phdr), sizeof(Phdr));
     }
   };
 
@@ -4317,11 +4319,9 @@ void RewriteInstance::patchELFPHDRTable() {
     case ELF::PT_GNU_STACK:
       if (opts::UseGnuStack) {
         // Overwrite the header with the new segment header.
-        assert(!opts::Instrument);
-        SmallVector<ELF64LE::Phdr, 3> NewPhdrs = createNewPhdrs();
-        assert(NewPhdrs.size() == 1 &&
-               "expect exactly one program header was created");
-        NewPhdr = NewPhdrs[0];
+        assert(BC->NewSegments.size() == 1 &&
+               "Expected exactly one new segment");
+        NewPhdr = createPhdr(BC->NewSegments.front());
         ModdedGnuStack = true;
       }
       break;
@@ -5946,8 +5946,10 @@ void RewriteInstance::rewriteFile() {
     addBATSection();
 
   // Patch program header table.
-  if (!BC->IsLinuxKernel)
+  if (!BC->IsLinuxKernel) {
+    updateSegmentInfo();
     patchELFPHDRTable();
+  }
 
   // Finalize memory image of section string table.
   finalizeSectionStringTable();
diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp
index 3fa0851d01e5a..d7374b323c916 100644
--- a/bolt/unittests/Core/BinaryContext.cpp
+++ b/bolt/unittests/Core/BinaryContext.cpp
@@ -199,13 +199,13 @@ TEST_P(BinaryContextTester, BaseAddress) {
   // Check that  base address calculation is correct for a binary with the
   // following segment layout:
   BC->SegmentMapInfo[0] =
-      SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000, true};
-  BC->SegmentMapInfo[0x10e8d2b4] =
-      SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000, true};
-  BC->SegmentMapInfo[0x4a3bddc0] =
-      SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000, true};
-  BC->SegmentMapInfo[0x4b84d5e8] =
-      SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000, true};
+      SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000, true, false};
+  BC->SegmentMapInfo[0x10e8d2b4] = SegmentInfo{
+      0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000, true, false};
+  BC->SegmentMapInfo[0x4a3bddc0] = SegmentInfo{
+      0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000, true, false};
+  BC->SegmentMapInfo[0x4b84d5e8] = SegmentInfo{
+      0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000, true, false};
 
   std::optional<uint64_t> BaseAddress =
       BC->getBaseAddressForMapping(0x7f13f5556000, 0x10e8c000);
@@ -220,13 +220,14 @@ TEST_P(BinaryContextTester, BaseAddress2) {
   // Check that base address calculation is correct for a binary if the
   // alignment in ELF file are different from pagesize.
   // The segment layout is as follows:
-  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000, true};
+  BC->SegmentMapInfo[0] =
+      SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000, true, false};
   BC->SegmentMapInfo[0x31860] =
-      SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000, true};
+      SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000, true, false};
   BC->SegmentMapInfo[0x41c20] =
-      SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000, true};
+      SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000, true, false};
   BC->SegmentMapInfo[0x54e18] =
-      SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000, true};
+      SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000, true, false};
 
   std::optional<uint64_t> BaseAddress =
       BC->getBaseAddressForMapping(0xaaaaea444000, 0x21000);
@@ -242,13 +243,14 @@ TEST_P(BinaryContextTester, BaseAddressSegmentsSmallerThanAlignment) {
   // when multiple segments are close together in the ELF file (closer
   // than the required alignment in the process space).
   // See https://github.com/llvm/llvm-project/issues/109384
-  BC->SegmentMapInfo[0] = SegmentInfo{0, 0x1d1c, 0, 0x1d1c, 0x10000, false};
+  BC->SegmentMapInfo[0] =
+      SegmentInfo{0, 0x1d1c, 0, 0x1d1c, 0x10000, false, false};
   BC->SegmentMapInfo[0x11d40] =
-      SegmentInfo{0x11d40, 0x11e0, 0x1d40, 0x11e0, 0x10000, true};
+      SegmentInfo{0x11d40, 0x11e0, 0x1d40, 0x11e0, 0x10000, true, false};
   BC->SegmentMapInfo[0x22f20] =
-      SegmentInfo{0x22f20, 0x10e0, 0x2f20, 0x1f0, 0x10000, false};
+      SegmentInfo{0x22f20, 0x10e0, 0x2f20, 0x1f0, 0x10000, false, false};
   BC->SegmentMapInfo[0x33110] =
-      SegmentInfo{0x33110, 0x89, 0x3110, 0x88, 0x10000, false};
+      SegmentInfo{0x33110, 0x89, 0x3110, 0x88, 0x10000, false, false};
 
   std::optional<uint64_t> BaseAddress =
       BC->getBaseAddressForMapping(0xaaaaaaab1000, 0x1000);
diff --git a/bolt/unittests/Core/MemoryMaps.cpp b/bolt/unittests/Core/MemoryMaps.cpp
index b0cab5431bdd3..8eb8f8ae529b1 100644
--- a/bolt/unittests/Core/MemoryMaps.cpp
+++ b/bolt/unittests/Core/MemoryMaps.cpp
@@ -100,10 +100,10 @@ TEST_P(MemoryMapsTester, ParseMultipleSegments) {
       "[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n",
       Pid, Filename);
 
-  BC->SegmentMapInfo[0x11da000] =
-      SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true};
-  BC->SegmentMapInfo[0x31d0000] =
-      SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0000, 0x3000000, 0x200000, true};
+  BC->SegmentMapInfo[0x11da000] = SegmentInfo{
+      0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true, false};
+  BC->SegmentMapInfo[0x31d0000] = SegmentInfo{
+      0x31d0000, 0x51ac82c, 0x31d0000, 0x3000000, 0x200000, true, false};
 
   DataAggregator DA("");
   BC->setFilename(Filename);
@@ -131,12 +131,12 @@ TEST_P(MemoryMapsTester, MultipleSegmentsMismatchedBaseAddress) {
       "[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n",
       Pid, Filename);
 
-  BC->SegmentMapInfo[0x11da000] =
-      SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true};
+  BC->SegmentMapInfo[0x11da000] = SegmentInfo{
+      0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true, false};
   // Using '0x31d0fff' FileOffset which triggers a different base address
   // for this second text segment.
-  BC->SegmentMapInfo[0x31d0000] =
-      SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0fff, 0x3000000, 0x200000, true};
+  BC->SegmentMapInfo[0x31d0000] = SegmentInfo{
+      0x31d0000, 0x51ac82c, 0x31d0fff, 0x3000000, 0x200000, true, false};
 
   DataAggregator DA("");
   BC->setFilename(Filename);

``````````

</details>


https://github.com/llvm/llvm-project/pull/146111


More information about the llvm-commits mailing list