[llvm] [BOLT] Add split function support for the Linux kernel (PR #90541)
Maksim Panchenko via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 30 14:31:30 PDT 2024
https://github.com/maksfb updated https://github.com/llvm/llvm-project/pull/90541
>From b49b7a92ec00b2cd7e6c4dce3173700687d92dc2 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Mon, 29 Apr 2024 11:18:33 -0700
Subject: [PATCH] [BOLT] Add split function support for the Linux kernel
While rewriting the Linux kernel, we try to fit optimized functions into
their original boundaries. When a function becomes larger, we skip it
during the rewrite and end up with less than optimal code layout. To
overcome that issue, add support for --split-function option so that
hot part of the function could be fit into the original space. The cold
part should go to reserved space in the binary.
---
bolt/include/bolt/Core/BinaryContext.h | 4 +++
bolt/lib/Passes/SplitFunctions.cpp | 13 +++++++
bolt/lib/Rewrite/LinuxKernelRewriter.cpp | 44 +++++++++++++++++++-----
bolt/lib/Rewrite/RewriteInstance.cpp | 38 +++++++++++---------
4 files changed, 73 insertions(+), 26 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 8b1af9e8153925..75765819ac464e 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -20,6 +20,7 @@
#include "bolt/Core/JumpTable.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/RuntimeLibs/RuntimeLibrary.h"
+#include "llvm/ADT/AddressRanges.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/iterator.h"
@@ -726,6 +727,9 @@ class BinaryContext {
uint64_t OldTextSectionOffset{0};
uint64_t OldTextSectionSize{0};
+ /// Area in the input binary reserved for BOLT.
+ AddressRange BOLTReserved;
+
/// Address of the code/function that is executed before any other code in
/// the binary.
std::optional<uint64_t> StartFunctionAddress;
diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
index f9e634d15a9724..bd0b6dea0e065a 100644
--- a/bolt/lib/Passes/SplitFunctions.cpp
+++ b/bolt/lib/Passes/SplitFunctions.cpp
@@ -715,6 +715,12 @@ Error SplitFunctions::runOnFunctions(BinaryContext &BC) {
if (!opts::SplitFunctions)
return Error::success();
+ if (BC.IsLinuxKernel && BC.BOLTReserved.empty()) {
+ BC.errs() << "BOLT-ERROR: split functions require reserved space in the "
+ "Linux kernel binary\n";
+ exit(1);
+ }
+
// If split strategy is not CDSplit, then a second run of the pass is not
// needed after function reordering.
if (BC.HasFinalizedFunctionOrder &&
@@ -829,6 +835,13 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
}
}
}
+
+ // Outlining blocks with dynamic branches is not supported yet.
+ if (BC.IsLinuxKernel) {
+ if (llvm::any_of(
+ *BB, [&](MCInst &Inst) { return BC.MIB->isDynamicBranch(Inst); }))
+ BB->setCanOutline(false);
+ }
}
BF.getLayout().updateLayoutIndices();
diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
index 3944bb742938a7..1fe984748c0231 100644
--- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
+++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
@@ -783,11 +783,9 @@ Error LinuxKernelRewriter::rewriteORCTables() {
};
// Emit new ORC entries for the emitted function.
- auto emitORC = [&](const BinaryFunction &BF) -> Error {
- assert(!BF.isSplit() && "Split functions not supported by ORC writer yet.");
-
+ auto emitORC = [&](const FunctionFragment &FF) -> Error {
ORCState CurrentState = NullORC;
- for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
+ for (BinaryBasicBlock *BB : FF) {
for (MCInst &Inst : *BB) {
ErrorOr<ORCState> ErrorOrState =
BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
@@ -808,7 +806,36 @@ Error LinuxKernelRewriter::rewriteORCTables() {
return Error::success();
};
+ // Emit ORC entries for cold fragments. We assume that these fragments are
+ // emitted contiguously in memory using reserved space in the kernel. This
+ // assumption is validated in post-emit pass validateORCTables() where we
+ // check that ORC entries are sorted by their addresses.
+ auto emitColdORC = [&]() -> Error {
+ for (BinaryFunction &BF :
+ llvm::make_second_range(BC.getBinaryFunctions())) {
+ if (!BC.shouldEmit(BF))
+ continue;
+ for (FunctionFragment &FF : BF.getLayout().getSplitFragments())
+ if (Error E = emitORC(FF))
+ return E;
+ }
+
+ return Error::success();
+ };
+
+ bool ShouldEmitCold = !BC.BOLTReserved.empty();
for (ORCListEntry &Entry : ORCEntries) {
+ if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) {
+ if (Error E = emitColdORC())
+ return E;
+
+ // Emit terminator entry at the end of the reserved region.
+ if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC))
+ return E;
+
+ ShouldEmitCold = false;
+ }
+
// Emit original entries for functions that we haven't modified.
if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) {
// Emit terminator only if it marks the start of a function.
@@ -822,7 +849,7 @@ Error LinuxKernelRewriter::rewriteORCTables() {
// Emit all ORC entries for a function referenced by an entry and skip over
// the rest of entries for this function by resetting its ORC attribute.
if (Entry.BF->hasORC()) {
- if (Error E = emitORC(*Entry.BF))
+ if (Error E = emitORC(Entry.BF->getLayout().getMainFragment()))
return E;
Entry.BF->setHasORC(false);
}
@@ -831,10 +858,9 @@ Error LinuxKernelRewriter::rewriteORCTables() {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
<< " ORC entries\n");
- // Replicate terminator entry at the end of sections to match the original
- // table sizes.
- const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second;
- const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize();
+ // Populate ORC tables with a terminator entry with max address to match the
+ // original table sizes.
+ const uint64_t LastIP = std::numeric_limits<uint64_t>::max();
while (UnwindWriter.bytesRemaining()) {
if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
return E;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 23f79e3c135a78..62759b7222a778 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3628,13 +3628,19 @@ void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) {
}
if (StartBD) {
+ if (StartBD->getAddress() >= EndBD->getAddress()) {
+ BC->errs() << "BOLT-ERROR: invalid reserved space boundaries\n";
+ exit(1);
+ }
+ BC->BOLTReserved = AddressRange(StartBD->getAddress(), EndBD->getAddress());
+ BC->outs()
+ << "BOLT-INFO: using reserved space for allocating new sections\n";
+
PHDRTableOffset = 0;
PHDRTableAddress = 0;
NewTextSegmentAddress = 0;
NewTextSegmentOffset = 0;
- NextAvailableAddress = StartBD->getAddress();
- BC->outs()
- << "BOLT-INFO: using reserved space for allocating new sections\n";
+ NextAvailableAddress = BC->BOLTReserved.start();
}
// If no new .eh_frame was written, remove relocated original .eh_frame.
@@ -3657,12 +3663,12 @@ void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) {
// Map the rest of the sections.
mapAllocatableSections(MapSection);
- if (StartBD) {
- const uint64_t ReservedSpace = EndBD->getAddress() - StartBD->getAddress();
- const uint64_t AllocatedSize = NextAvailableAddress - StartBD->getAddress();
- if (ReservedSpace < AllocatedSize) {
- BC->errs() << "BOLT-ERROR: reserved space (" << ReservedSpace << " byte"
- << (ReservedSpace == 1 ? "" : "s")
+ if (!BC->BOLTReserved.empty()) {
+ const uint64_t AllocatedSize =
+ NextAvailableAddress - BC->BOLTReserved.start();
+ if (BC->BOLTReserved.size() < AllocatedSize) {
+ BC->errs() << "BOLT-ERROR: reserved space (" << BC->BOLTReserved.size()
+ << " byte" << (BC->BOLTReserved.size() == 1 ? "" : "s")
<< ") is smaller than required for new allocations ("
<< AllocatedSize << " bytes)\n";
exit(1);
@@ -5852,13 +5858,11 @@ void RewriteInstance::writeEHFrameHeader() {
NextAvailableAddress += EHFrameHdrSec.getOutputSize();
- if (const BinaryData *ReservedEnd =
- BC->getBinaryDataByName(getBOLTReservedEnd())) {
- if (NextAvailableAddress > ReservedEnd->getAddress()) {
- BC->errs() << "BOLT-ERROR: unable to fit " << getEHFrameHdrSectionName()
- << " into reserved space\n";
- exit(1);
- }
+ if (!BC->BOLTReserved.empty() &&
+ (NextAvailableAddress > BC->BOLTReserved.end())) {
+ BC->errs() << "BOLT-ERROR: unable to fit " << getEHFrameHdrSectionName()
+ << " into reserved space\n";
+ exit(1);
}
// Merge new .eh_frame with the relocated original so that gdb can locate all
@@ -5892,7 +5896,7 @@ uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) {
uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const {
// Check if it's possibly part of the new segment.
- if (Address >= NewTextSegmentAddress)
+ if (NewTextSegmentAddress && Address >= NewTextSegmentAddress)
return Address - NewTextSegmentAddress + NewTextSegmentOffset;
// Find an existing segment that matches the address.
More information about the llvm-commits
mailing list