[llvm] [BOLT] Add support for BOLT-reserved space in a binary (PR #90300)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 29 11:45:19 PDT 2024


https://github.com/maksfb updated https://github.com/llvm/llvm-project/pull/90300

>From 8c106c50cff77a71b39d19e4d91037edb61efd80 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Fri, 26 Apr 2024 16:39:00 -0700
Subject: [PATCH 1/2] [BOLT] Add support for BOLT-reserved space in a binary

Allow the user to allocate space in a binary that could be used by BOLT
for allocating new sections. The reservation is specified by two special
symbols recognizable by BOLT: __bolt_reserved_{start,end}.

The reserved space will be useful for optimizing the Linux kernel where
we cannot allocate a new executable segment. However, the support is not
limited to kernel binaries as some user-space application may find it
useful too.
---
 bolt/include/bolt/Rewrite/RewriteInstance.h |  4 +
 bolt/lib/Rewrite/RewriteInstance.cpp        | 86 +++++++++++++++------
 2 files changed, 68 insertions(+), 22 deletions(-)

diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index f4bffba96b1d4e..21d66a709364c4 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -422,6 +422,10 @@ class RewriteInstance {
   /// Section name used for extra BOLT code in addition to .text.
   static StringRef getBOLTTextSectionName() { return ".bolt.text"; }
 
+  /// Symbol markers for BOLT reserved area.
+  static StringRef getBOLTReservedStart() { return "__bolt_reserved_start"; }
+  static StringRef getBOLTReservedEnd() { return "__bolt_reserved_end"; }
+
   /// Common section names.
   static StringRef getEHFrameSectionName() { return ".eh_frame"; }
   static StringRef getEHFrameHdrSectionName() { return ".eh_frame_hdr"; }
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 065260936e70a5..72ebf57a0e737a 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1062,6 +1062,11 @@ void RewriteInstance::discoverFileObjects() {
       continue;
     }
 
+    if (SymName == getBOLTReservedStart() || SymName == getBOLTReservedEnd()) {
+      registerName(SymbolSize);
+      continue;
+    }
+
     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName
                       << " for function\n");
 
@@ -3526,6 +3531,24 @@ void RewriteInstance::updateMetadata() {
 void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) {
   BC->deregisterUnusedSections();
 
+  // Check if the input has a space reserved for BOLT.
+  BinaryData *StartBD = BC->getBinaryDataByName(getBOLTReservedStart());
+  BinaryData *EndBD = BC->getBinaryDataByName(getBOLTReservedEnd());
+  if (StartBD) {
+    if (!EndBD) {
+      BC->errs() << "BOLT-ERROR: " << getBOLTReservedEnd() << " is missing\n";
+      exit(1);
+    }
+
+    PHDRTableOffset = 0;
+    PHDRTableAddress = 0;
+    NextAvailableAddress = StartBD->getAddress();
+    NewTextSegmentAddress = 0;
+    NewTextSegmentOffset = 0;
+    BC->outs()
+        << "BOLT-INFO: using reserved space for allocating new sections\n";
+  }
+
   // If no new .eh_frame was written, remove relocated original .eh_frame.
   BinarySection *RelocatedEHFrameSection =
       getSection(".relocated" + getEHFrameSectionName());
@@ -3545,6 +3568,18 @@ void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) {
 
   // Map the rest of the sections.
   mapAllocatableSections(MapSection);
+
+  if (StartBD) {
+    const uint64_t ReservedSpace = EndBD->getAddress() - StartBD->getAddress();
+    const uint64_t AllocatedSize = NextAvailableAddress - StartBD->getAddress();
+    if (ReservedSpace < AllocatedSize) {
+      BC->errs() << "BOLT-ERROR: reserved space (" << ReservedSpace << " byte"
+                 << (ReservedSpace == 1 ? "" : "s")
+                 << ") is smaller than required for new allocations ("
+                 << AllocatedSize << " bytes)\n";
+      exit(1);
+    }
+  }
 }
 
 std::vector<BinarySection *> RewriteInstance::getCodeSections() {
@@ -3786,7 +3821,7 @@ void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) {
   // Add the new text section aggregating all existing code sections.
   // This is pseudo-section that serves a purpose of creating a corresponding
   // entry in section header table.
-  int64_t NewTextSectionSize =
+  const uint64_t NewTextSectionSize =
       NextAvailableAddress - NewTextSectionStartAddress;
   if (NewTextSectionSize) {
     const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
@@ -3869,7 +3904,7 @@ void RewriteInstance::mapAllocatableSections(
       if (PHDRTableAddress) {
         // Segment size includes the size of the PHDR area.
         NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress;
-      } else {
+      } else if (NewTextSegmentAddress) {
         // Existing PHDR table would be updated.
         NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
       }
@@ -3908,7 +3943,7 @@ void RewriteInstance::patchELFPHDRTable() {
     assert(!PHDRTableAddress && "unexpected address for program header table");
     PHDRTableOffset = Obj.getHeader().e_phoff;
     if (NewWritableSegmentSize) {
-      BC->errs() << "Unable to add writable segment with UseGnuStack option\n";
+      BC->errs() << "BOLT-ERROR: unable to add writable segment\n";
       exit(1);
     }
   }
@@ -3918,7 +3953,7 @@ void RewriteInstance::patchELFPHDRTable() {
   if (!NewWritableSegmentSize) {
     if (PHDRTableAddress)
       NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress;
-    else
+    else if (NewTextSegmentAddress)
       NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
   } else {
     NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress;
@@ -3952,8 +3987,10 @@ void RewriteInstance::patchELFPHDRTable() {
   };
 
   auto writeNewSegmentPhdrs = [&]() {
-    ELF64LE::Phdr NewTextPhdr = createNewTextPhdr();
-    OS.write(reinterpret_cast<const char *>(&NewTextPhdr), sizeof(NewTextPhdr));
+    if (PHDRTableAddress || NewTextSegmentSize) {
+      ELF64LE::Phdr NewPhdr = createNewTextPhdr();
+      OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr));
+    }
 
     if (NewWritableSegmentSize) {
       ELF64LEPhdrTy NewPhdr;
@@ -4051,9 +4088,8 @@ void RewriteInstance::rewriteNoteSections() {
   const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
   raw_fd_ostream &OS = Out->os();
 
-  uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
-  assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
-         "next available offset calculation failure");
+  uint64_t NextAvailableOffset = std::max(
+      getFileOffsetForAddress(NextAvailableAddress), FirstNonAllocatableOffset);
   OS.seek(NextAvailableOffset);
 
   // Copy over non-allocatable section contents and update file offsets.
@@ -4792,7 +4828,7 @@ void RewriteInstance::updateELFSymbolTable(
       ++NumHotDataSymsUpdated;
     }
 
-    if (*SymbolName == "_end")
+    if (*SymbolName == "_end" && NextAvailableAddress > Symbol.st_value)
       updateSymbolValue(*SymbolName, NextAvailableAddress);
 
     if (IsDynSym)
@@ -4906,13 +4942,6 @@ void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) {
   std::vector<uint32_t> NewSectionIndex;
   getOutputSections(File, NewSectionIndex);
 
-  // Set pointer at the end of the output file, so we can pwrite old symbol
-  // tables if we need to.
-  uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
-  assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
-         "next available offset calculation failure");
-  Out->os().seek(NextAvailableOffset);
-
   // Update dynamic symbol table.
   const ELFShdrTy *DynSymSection = nullptr;
   for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
@@ -4924,6 +4953,10 @@ void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) {
   assert((DynSymSection || BC->IsStaticExecutable) &&
          "dynamic symbol table expected");
   if (DynSymSection) {
+    // Set pointer to the end of the section, so we can use pwrite to update
+    // the dynamic symbol table.
+    Out->os().seek(DynSymSection->sh_offset + DynSymSection->sh_size);
+
     updateELFSymbolTable(
         File,
         /*IsDynSym=*/true,
@@ -5477,10 +5510,10 @@ void RewriteInstance::rewriteFile() {
   auto Streamer = BC->createStreamer(OS);
   // Make sure output stream has enough reserved space, otherwise
   // pwrite() will fail.
-  uint64_t Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress));
-  (void)Offset;
-  assert(Offset == getFileOffsetForAddress(NextAvailableAddress) &&
-         "error resizing output file");
+  uint64_t Offset = std::max(getFileOffsetForAddress(NextAvailableAddress),
+                             FirstNonAllocatableOffset);
+  Offset = OS.seek(Offset);
+  assert((Offset != (uint64_t)-1) && "Error resizing output file");
 
   // Overwrite functions with fixed output address. This is mostly used by
   // non-relocation mode, with one exception: injected functions are covered
@@ -5712,7 +5745,7 @@ void RewriteInstance::writeEHFrameHeader() {
   std::vector<char> NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader(
       RelocatedEHFrame, NewEHFrame, EHFrameHdrOutputAddress, FailedAddresses);
 
-  assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch");
+  Out->os().seek(EHFrameHdrFileOffset);
   Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size());
 
   const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
@@ -5732,6 +5765,15 @@ void RewriteInstance::writeEHFrameHeader() {
 
   NextAvailableAddress += EHFrameHdrSec.getOutputSize();
 
+  if (const BinaryData *ReservedEnd =
+          BC->getBinaryDataByName(getBOLTReservedEnd())) {
+    if (NextAvailableAddress > ReservedEnd->getAddress()) {
+      BC->errs() << "BOLT-ERROR: unable to fit " << getEHFrameHdrSectionName()
+                 << " into reserved space\n";
+      exit(1);
+    }
+  }
+
   // Merge new .eh_frame with the relocated original so that gdb can locate all
   // FDEs.
   if (RelocatedEHFrameSection) {

>From cac62dae147c315e099653cdfb5ad9a86894d0db Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Mon, 29 Apr 2024 11:44:51 -0700
Subject: [PATCH 2/2] fixup! [BOLT] Add support for BOLT-reserved space in a
 binary

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 72ebf57a0e737a..166cc1185167b1 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3534,17 +3534,19 @@ void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) {
   // Check if the input has a space reserved for BOLT.
   BinaryData *StartBD = BC->getBinaryDataByName(getBOLTReservedStart());
   BinaryData *EndBD = BC->getBinaryDataByName(getBOLTReservedEnd());
-  if (StartBD) {
-    if (!EndBD) {
-      BC->errs() << "BOLT-ERROR: " << getBOLTReservedEnd() << " is missing\n";
-      exit(1);
-    }
+  if (!StartBD != !EndBD) {
+    BC->errs() << "BOLT-ERROR: one of the symbols is missing from the binary: "
+               << getBOLTReservedStart() << ", " << getBOLTReservedEnd()
+               << '\n';
+    exit(1);
+  }
 
+  if (StartBD) {
     PHDRTableOffset = 0;
     PHDRTableAddress = 0;
-    NextAvailableAddress = StartBD->getAddress();
     NewTextSegmentAddress = 0;
     NewTextSegmentOffset = 0;
+    NextAvailableAddress = StartBD->getAddress();
     BC->outs()
         << "BOLT-INFO: using reserved space for allocating new sections\n";
   }



More information about the llvm-commits mailing list