[llvm] [BOLT] Add split function support for the Linux kernel (PR #90541)

via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 29 18:48:46 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Maksim Panchenko (maksfb)

<details>
<summary>Changes</summary>

While rewriting the Linux kernel, we try to fit optimized functions into their original boundaries. When a function becomes larger, we skip it during the rewrite and end up with less than optimal code layout. To overcome that issue, add support for --split-function option so that hot part of the function could be fit into the original space. The cold part should go to reserved space in the binary.

---
Full diff: https://github.com/llvm/llvm-project/pull/90541.diff


4 Files Affected:

- (modified) bolt/include/bolt/Core/BinaryContext.h (+4) 
- (modified) bolt/lib/Passes/SplitFunctions.cpp (+13) 
- (modified) bolt/lib/Rewrite/LinuxKernelRewriter.cpp (+66-9) 
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+21-17) 


``````````diff
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 8b1af9e8153925..75765819ac464e 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -20,6 +20,7 @@
 #include "bolt/Core/JumpTable.h"
 #include "bolt/Core/MCPlusBuilder.h"
 #include "bolt/RuntimeLibs/RuntimeLibrary.h"
+#include "llvm/ADT/AddressRanges.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/iterator.h"
@@ -726,6 +727,9 @@ class BinaryContext {
   uint64_t OldTextSectionOffset{0};
   uint64_t OldTextSectionSize{0};
 
+  /// Area in the input binary reserved for BOLT.
+  AddressRange BOLTReserved;
+
   /// Address of the code/function that is executed before any other code in
   /// the binary.
   std::optional<uint64_t> StartFunctionAddress;
diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
index f9e634d15a9724..bd0b6dea0e065a 100644
--- a/bolt/lib/Passes/SplitFunctions.cpp
+++ b/bolt/lib/Passes/SplitFunctions.cpp
@@ -715,6 +715,12 @@ Error SplitFunctions::runOnFunctions(BinaryContext &BC) {
   if (!opts::SplitFunctions)
     return Error::success();
 
+  if (BC.IsLinuxKernel && BC.BOLTReserved.empty()) {
+    BC.errs() << "BOLT-ERROR: split functions require reserved space in the "
+                 "Linux kernel binary\n";
+    exit(1);
+  }
+
   // If split strategy is not CDSplit, then a second run of the pass is not
   // needed after function reordering.
   if (BC.HasFinalizedFunctionOrder &&
@@ -829,6 +835,13 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
         }
       }
     }
+
+    // Outlining blocks with dynamic branches is not supported yet.
+    if (BC.IsLinuxKernel) {
+      if (llvm::any_of(
+              *BB, [&](MCInst &Inst) { return BC.MIB->isDynamicBranch(Inst); }))
+        BB->setCanOutline(false);
+    }
   }
 
   BF.getLayout().updateLayoutIndices();
diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
index d96199e020d31a..1fe984748c0231 100644
--- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
+++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
@@ -248,6 +248,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
   /// Update ORC data in the binary.
   Error rewriteORCTables();
 
+  /// Validate written ORC tables after binary emission.
+  Error validateORCTables();
+
   /// Static call table handling.
   Error readStaticCalls();
   Error rewriteStaticCalls();
@@ -358,6 +361,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
     if (Error E = updateStaticKeysJumpTablePostEmit())
       return E;
 
+    if (Error E = validateORCTables())
+      return E;
+
     return Error::success();
   }
 };
@@ -777,11 +783,9 @@ Error LinuxKernelRewriter::rewriteORCTables() {
   };
 
   // Emit new ORC entries for the emitted function.
-  auto emitORC = [&](const BinaryFunction &BF) -> Error {
-    assert(!BF.isSplit() && "Split functions not supported by ORC writer yet.");
-
+  auto emitORC = [&](const FunctionFragment &FF) -> Error {
     ORCState CurrentState = NullORC;
-    for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
+    for (BinaryBasicBlock *BB : FF) {
       for (MCInst &Inst : *BB) {
         ErrorOr<ORCState> ErrorOrState =
             BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
@@ -802,7 +806,36 @@ Error LinuxKernelRewriter::rewriteORCTables() {
     return Error::success();
   };
 
+  // Emit ORC entries for cold fragments. We assume that these fragments are
+  // emitted contiguously in memory using reserved space in the kernel. This
+  // assumption is validated in post-emit pass validateORCTables() where we
+  // check that ORC entries are sorted by their addresses.
+  auto emitColdORC = [&]() -> Error {
+    for (BinaryFunction &BF :
+         llvm::make_second_range(BC.getBinaryFunctions())) {
+      if (!BC.shouldEmit(BF))
+        continue;
+      for (FunctionFragment &FF : BF.getLayout().getSplitFragments())
+        if (Error E = emitORC(FF))
+          return E;
+    }
+
+    return Error::success();
+  };
+
+  bool ShouldEmitCold = !BC.BOLTReserved.empty();
   for (ORCListEntry &Entry : ORCEntries) {
+    if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) {
+      if (Error E = emitColdORC())
+        return E;
+
+      // Emit terminator entry at the end of the reserved region.
+      if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC))
+        return E;
+
+      ShouldEmitCold = false;
+    }
+
     // Emit original entries for functions that we haven't modified.
     if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) {
       // Emit terminator only if it marks the start of a function.
@@ -816,7 +849,7 @@ Error LinuxKernelRewriter::rewriteORCTables() {
     // Emit all ORC entries for a function referenced by an entry and skip over
     // the rest of entries for this function by resetting its ORC attribute.
     if (Entry.BF->hasORC()) {
-      if (Error E = emitORC(*Entry.BF))
+      if (Error E = emitORC(Entry.BF->getLayout().getMainFragment()))
         return E;
       Entry.BF->setHasORC(false);
     }
@@ -825,10 +858,9 @@ Error LinuxKernelRewriter::rewriteORCTables() {
   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
                     << " ORC entries\n");
 
-  // Replicate terminator entry at the end of sections to match the original
-  // table sizes.
-  const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second;
-  const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize();
+  // Populate ORC tables with a terminator entry with max address to match the
+  // original table sizes.
+  const uint64_t LastIP = std::numeric_limits<uint64_t>::max();
   while (UnwindWriter.bytesRemaining()) {
     if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
       return E;
@@ -837,6 +869,31 @@ Error LinuxKernelRewriter::rewriteORCTables() {
   return Error::success();
 }
 
+Error LinuxKernelRewriter::validateORCTables() {
+  if (!ORCUnwindIPSection)
+    return Error::success();
+
+  const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress();
+  DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getOutputContents(),
+                                     BC.AsmInfo->isLittleEndian(),
+                                     BC.AsmInfo->getCodePointerSize());
+  DataExtractor::Cursor IPCursor(0);
+  uint64_t PrevIP = 0;
+  for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
+    const uint64_t IP =
+        IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
+    if (!IPCursor)
+      return createStringError(errc::executable_format_error,
+                               "out of bounds while reading ORC IP table: %s",
+                               toString(IPCursor.takeError()).c_str());
+
+    assert(IP >= PrevIP && "Unsorted ORC table detected");
+    PrevIP = IP;
+  }
+
+  return Error::success();
+}
+
 /// The static call site table is created by objtool and contains entries in the
 /// following format:
 ///
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 644d87eeca42e6..9b555f2e1a80cf 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3628,13 +3628,19 @@ void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) {
   }
 
   if (StartBD) {
+    if (StartBD->getAddress() >= EndBD->getAddress()) {
+      BC->errs() << "BOLT-ERROR: invalid reserved space boundaries\n";
+      exit(1);
+    }
+    BC->BOLTReserved = AddressRange(StartBD->getAddress(), EndBD->getAddress());
+    BC->outs()
+        << "BOLT-INFO: using reserved space for allocating new sections\n";
+
     PHDRTableOffset = 0;
     PHDRTableAddress = 0;
     NewTextSegmentAddress = 0;
     NewTextSegmentOffset = 0;
-    NextAvailableAddress = StartBD->getAddress();
-    BC->outs()
-        << "BOLT-INFO: using reserved space for allocating new sections\n";
+    NextAvailableAddress = BC->BOLTReserved.start();
   }
 
   // If no new .eh_frame was written, remove relocated original .eh_frame.
@@ -3657,12 +3663,12 @@ void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) {
   // Map the rest of the sections.
   mapAllocatableSections(MapSection);
 
-  if (StartBD) {
-    const uint64_t ReservedSpace = EndBD->getAddress() - StartBD->getAddress();
-    const uint64_t AllocatedSize = NextAvailableAddress - StartBD->getAddress();
-    if (ReservedSpace < AllocatedSize) {
-      BC->errs() << "BOLT-ERROR: reserved space (" << ReservedSpace << " byte"
-                 << (ReservedSpace == 1 ? "" : "s")
+  if (!BC->BOLTReserved.empty()) {
+    const uint64_t AllocatedSize =
+        NextAvailableAddress - BC->BOLTReserved.start();
+    if (BC->BOLTReserved.size() < AllocatedSize) {
+      BC->errs() << "BOLT-ERROR: reserved space (" << BC->BOLTReserved.size()
+                 << " byte" << (BC->BOLTReserved.size() == 1 ? "" : "s")
                  << ") is smaller than required for new allocations ("
                  << AllocatedSize << " bytes)\n";
       exit(1);
@@ -5853,13 +5859,11 @@ void RewriteInstance::writeEHFrameHeader() {
 
   NextAvailableAddress += EHFrameHdrSec.getOutputSize();
 
-  if (const BinaryData *ReservedEnd =
-          BC->getBinaryDataByName(getBOLTReservedEnd())) {
-    if (NextAvailableAddress > ReservedEnd->getAddress()) {
-      BC->errs() << "BOLT-ERROR: unable to fit " << getEHFrameHdrSectionName()
-                 << " into reserved space\n";
-      exit(1);
-    }
+  if (!BC->BOLTReserved.empty() &&
+      (NextAvailableAddress > BC->BOLTReserved.end())) {
+    BC->errs() << "BOLT-ERROR: unable to fit " << getEHFrameHdrSectionName()
+               << " into reserved space\n";
+    exit(1);
   }
 
   // Merge new .eh_frame with the relocated original so that gdb can locate all
@@ -5893,7 +5897,7 @@ uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) {
 
 uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const {
   // Check if it's possibly part of the new segment.
-  if (Address >= NewTextSegmentAddress)
+  if (NewTextSegmentAddress && Address >= NewTextSegmentAddress)
     return Address - NewTextSegmentAddress + NewTextSegmentOffset;
 
   // Find an existing segment that matches the address.

``````````

</details>


https://github.com/llvm/llvm-project/pull/90541


More information about the llvm-commits mailing list