[llvm] [BOLT] Add writing support for Linux kernel ORC (PR #80950)
Maksim Panchenko via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 6 23:54:08 PST 2024
https://github.com/maksfb created https://github.com/llvm/llvm-project/pull/80950
Update ORC information based on the new code layout and emit corresponding ORC sections for the Linux kernel.
>From f4ac7eb84ae3a59b37c86c39a88b2f1beb67ff78 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Wed, 31 Jan 2024 22:39:17 -0800
Subject: [PATCH] [BOLT] Add writing support for Linux kernel ORC
Update ORC information based on the new code layout and emit
corresponding ORC sections for the Linux kernel.
---
bolt/lib/Rewrite/LinuxKernelRewriter.cpp | 212 ++++++++++++++++++++---
bolt/test/X86/linux-orc.s | 54 ++++--
2 files changed, 226 insertions(+), 40 deletions(-)
diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
index c8674d6b837ad..d7edba881163b 100644
--- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
+++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
@@ -14,9 +14,12 @@
#include "bolt/Rewrite/MetadataRewriter.h"
#include "bolt/Rewrite/MetadataRewriters.h"
#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Errc.h"
+#define DEBUG_TYPE "bolt-linux"
+
using namespace llvm;
using namespace bolt;
@@ -48,20 +51,25 @@ struct ORCState {
bool operator!=(const ORCState &Other) const { return !(*this == Other); }
};
+/// Section terminator ORC entry.
+static ORCState NullORC = {0, 0, 0};
+
/// Basic printer for ORC entry. It does not provide the same level of
/// information as objtool (for now).
inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) {
- if (opts::PrintORC)
+ if (!opts::PrintORC)
+ return OS;
+ if (E != NullORC)
OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset,
E.Info);
+ else
+ OS << "{terminator}";
+
return OS;
}
namespace {
-/// Section terminator ORC entry.
-static ORCState NullORC = {0, 0, 0};
-
class LinuxKernelRewriter final : public MetadataRewriter {
/// Linux Kernel special sections point to a specific instruction in many
/// cases. Unlike SDTMarkerInfo, these markers can come from different
@@ -102,6 +110,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
using ORCListType = std::vector<ORCListEntry>;
ORCListType ORCEntries;
+ /// Number of entries in the input file ORC sections.
+ uint64_t NumORCEntries = 0;
+
/// Insert an LKMarker for a given code pointer \p PC from a non-code section
/// \p SectionName.
void insertLKMarker(uint64_t PC, uint64_t SectionOffset,
@@ -207,8 +218,6 @@ void LinuxKernelRewriter::insertLKMarker(uint64_t PC, uint64_t SectionOffset,
}
void LinuxKernelRewriter::processLKSections() {
- assert(BC.IsLinuxKernel && "Linux kernel binary expected.");
-
processLKExTable();
processLKPCIFixup();
processLKKSymtab();
@@ -464,10 +473,9 @@ Error LinuxKernelRewriter::readORCTables() {
return createStringError(errc::executable_format_error,
"missing ORC section");
- const uint64_t NumEntries =
- ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
- if (ORCUnwindSection->getSize() != NumEntries * ORC_UNWIND_ENTRY_SIZE ||
- ORCUnwindIPSection->getSize() != NumEntries * ORC_UNWIND_IP_ENTRY_SIZE)
+ NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
+ if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE ||
+ ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE)
return createStringError(errc::executable_format_error,
"ORC entries number mismatch detected");
@@ -481,7 +489,7 @@ Error LinuxKernelRewriter::readORCTables() {
DataExtractor::Cursor ORCCursor(0);
DataExtractor::Cursor IPCursor(0);
uint64_t PrevIP = 0;
- for (uint32_t Index = 0; Index < NumEntries; ++Index) {
+ for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
const uint64_t IP =
IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
@@ -505,25 +513,24 @@ Error LinuxKernelRewriter::readORCTables() {
Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor);
Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor);
Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor);
+ Entry.BF = nullptr;
// Consume the status of the cursor.
if (!ORCCursor)
return createStringError(errc::executable_format_error,
"out of bounds while reading ORC");
+ if (Entry.ORC == NullORC)
+ continue;
+
BinaryFunction *&BF = Entry.BF;
BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true);
// If the entry immediately pointing past the end of the function is not
// the terminator entry, then it does not belong to this function.
- if (BF && BF->getAddress() + BF->getSize() == IP && Entry.ORC != NullORC)
+ if (BF && BF->getAddress() + BF->getSize() == IP)
BF = 0;
- // If terminator entry points to the start of the function, then it belongs
- // to a different function that contains the previous IP.
- if (BF && BF->getAddress() == IP && Entry.ORC == NullORC)
- BF = BC.getBinaryFunctionContainingAddress(IP - 1);
-
if (!BF) {
if (opts::Verbosity)
errs() << "BOLT-WARNING: no binary function found matching ORC 0x"
@@ -531,9 +538,6 @@ Error LinuxKernelRewriter::readORCTables() {
continue;
}
- if (Entry.ORC == NullORC)
- continue;
-
BF->setHasORC(true);
if (!BF->hasInstructions())
@@ -556,12 +560,46 @@ Error LinuxKernelRewriter::readORCTables() {
BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC);
}
- // Older kernels could contain unsorted tables in the file as the tables were
- // sorted during boot time.
+ if (opts::DumpORC) {
+ outs() << "BOLT-INFO: ORC unwind information:\n";
+ for (const ORCListEntry &E : ORCEntries) {
+ outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC;
+ if (E.BF)
+ outs() << ": " << *E.BF;
+ outs() << '\n';
+ }
+ }
+
+ // Add entries for functions that don't have explicit ORC info at the start.
+ // We'll have the correct info for them even if ORC for the preceding function
+ // changes.
+ ORCListType NewEntries;
+ for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
+ auto It = llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
+ return E.IP <= BF.getAddress();
+ });
+ if (It != ORCEntries.begin())
+ --It;
+
+ if (It->BF == &BF)
+ continue;
+
+ if (It->ORC == NullORC && It->IP == BF.getAddress()) {
+ assert(!It->BF);
+ It->BF = &BF;
+ continue;
+ }
+
+ NewEntries.push_back({BF.getAddress(), &BF, It->ORC});
+ if (It->ORC != NullORC)
+ BF.setHasORC(true);
+ }
+
+ llvm::copy(NewEntries, std::back_inserter(ORCEntries));
llvm::sort(ORCEntries);
if (opts::DumpORC) {
- outs() << "BOLT-INFO: ORC unwind information:\n";
+ outs() << "BOLT-INFO: amended ORC unwind information:\n";
for (const ORCListEntry &E : ORCEntries) {
outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC;
if (E.BF)
@@ -593,20 +631,26 @@ Error LinuxKernelRewriter::processORCPostCFG() {
continue;
}
- // In case there was no ORC entry that matched the function start
- // address, we need to propagate ORC state from the previous entry.
+ // Get state for the start of the function.
if (!CurrentState) {
auto It =
llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
- return E.IP < BF.getAddress();
+ return E.IP <= BF.getAddress();
});
if (It != ORCEntries.begin())
- It = std::prev(It);
+ --It;
+
+ if (It->IP != BF.getAddress() || It->BF != &BF)
+ dbgs() << "0x" << Twine::utohexstr(It->IP) << " : " << BF << '\n';
+ assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) &&
+ "Function entry expected.");
if (It->ORC == NullORC && BF.hasORC())
errs() << "BOLT-WARNING: ORC unwind info excludes prologue for "
<< BF << '\n';
+ It->BF = &BF;
+
CurrentState = It->ORC;
if (It->ORC != NullORC)
BF.setHasORC(true);
@@ -623,9 +667,121 @@ Error LinuxKernelRewriter::processORCPostCFG() {
}
Error LinuxKernelRewriter::rewriteORCTables() {
- // TODO:
+ if (!ORCUnwindSection || !ORCUnwindIPSection)
+ return Error::success();
+
+ // Update ORC sections in-place. As we change the code, the number of ORC
+ // entries may increase for some functions. However, as we remove terminator
+ // redundancy (see below), more space is freed up and we should always be able
+ // to fit new ORC tables in the reserved space.
+ auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter {
+ const size_t Size = Section.getSize();
+ uint8_t *NewContents = new uint8_t[Size];
+ Section.updateContents(NewContents, Size);
+ Section.setOutputFileOffset(Section.getInputFileOffset());
+ return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian()
+ ? endianness::little
+ : endianness::big);
+ };
+ BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection);
+ BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection);
+
+ uint64_t NumEmitted = 0;
+ std::optional<ORCState> LastEmittedORC;
+ auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC,
+ MCSymbol *Label = 0, bool Force = false) -> Error {
+ if (LastEmittedORC && ORC == *LastEmittedORC && !Force)
+ return Error::success();
+
+ LastEmittedORC = ORC;
+
+ if (++NumEmitted > NumORCEntries)
+ return createStringError(errc::executable_format_error,
+ "exceeded the number of allocated ORC entries");
+
+ if (Label)
+ ORCUnwindIPSection->addRelocation(UnwindIPWriter.getOffset(), Label,
+ Relocation::getPC32(), /*Addend*/ 0);
+
+ const int32_t IPValue =
+ IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset();
+ if (Error E = UnwindIPWriter.writeInteger(IPValue))
+ return E;
+
+ if (Error E = UnwindWriter.writeInteger(ORC.SPOffset))
+ return E;
+ if (Error E = UnwindWriter.writeInteger(ORC.BPOffset))
+ return E;
+ if (Error E = UnwindWriter.writeInteger(ORC.Info))
+ return E;
+
+ return Error::success();
+ };
+
+ // Emit new ORC entries for an emitted function.
+ auto emitORC = [&](const BinaryFunction &BF) -> Error {
+ assert(!BF.isSplit() && "Split functions not supported by ORC writer yet.");
+
+ ORCState CurrentState = NullORC;
+ for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
+ for (MCInst &Inst : *BB) {
+ ErrorOr<ORCState> ErrorOrState =
+ BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
+ if (!ErrorOrState || *ErrorOrState == CurrentState)
+ continue;
+
+ // Issue label for the instruction.
+ MCSymbol *Label = BC.MIB->getLabel(Inst);
+ if (!Label) {
+ Label = BC.Ctx->createTempSymbol("__ORC_");
+ BC.MIB->setLabel(Inst, Label);
+ }
+
+ if (Error E = emitORCEntry(0, *ErrorOrState, Label))
+ return E;
+
+ CurrentState = *ErrorOrState;
+ }
+ }
+
+ return Error::success();
+ };
+
+ for (ORCListEntry &Entry : ORCEntries) {
+ // Emit original entries for functions that we haven't modified.
+ if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) {
+ // Emit terminator only if it marks the start of a function.
+ if (Entry.ORC == NullORC && !Entry.BF)
+ continue;
+ if (Error E = emitORCEntry(Entry.IP, Entry.ORC))
+ return E;
+ continue;
+ }
+
+ // Emit all ORC entries for a function referenced by an entry and skip over
+ // the rest of entries for this function by resetting its ORC attribute.
+ if (Entry.BF->hasORC()) {
+ if (Error E = emitORC(*Entry.BF))
+ return E;
+ Entry.BF->setHasORC(false);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
+ << " ORC entries\n");
+
+ // Replicate terminator entry at the end of sections to match the original
+ // table sizes.
+ const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second;
+ const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize();
+ while (UnwindWriter.bytesRemaining()) {
+ if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
+ return E;
+ }
+
return Error::success();
}
+
} // namespace
std::unique_ptr<MetadataRewriter>
diff --git a/bolt/test/X86/linux-orc.s b/bolt/test/X86/linux-orc.s
index bd652eafa23dd..729eea4bc589a 100644
--- a/bolt/test/X86/linux-orc.s
+++ b/bolt/test/X86/linux-orc.s
@@ -1,28 +1,52 @@
# REQUIRES: system-linux
-## Check that BOLT correctly reads ORC unwind information used by Linux kernel.
+## Check that BOLT correctly updates ORC unwind information used by the Linux
+## kernel.
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
-# RUN: %clang %cflags -nostdlib %t.o -o %t.exe -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr
+# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
+# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr
+
+## Verify reading contents of ORC sections.
+
+# RUN: llvm-bolt %t.exe --dump-orc -o /dev/null |& FileCheck %s \
+# RUN: --check-prefix=CHECK-ORC
+
+# CHECK-ORC: BOLT-INFO: ORC unwind information:
+# CHECK-ORC-NEXT: {sp: 8, bp: 0, info: 0x5}: _start
+# CHECK-ORC-NEXT: {terminator}
+# CHECK-ORC-NEXT: {sp: 8, bp: 0, info: 0x5}: foo
+# CHECK-ORC-NEXT: {sp: 16, bp: -16, info: 0x15}: foo
+# CHECK-ORC-NEXT: {sp: 16, bp: -16, info: 0x14}: foo
+# CHECK-ORC-NEXT: {sp: 8, bp: 0, info: 0x5}: foo
+# CHECK-ORC-NEXT: {terminator}
+# CHECK-ORC-NEXT: {terminator}
+# CHECK-ORC-NEXT: {terminator}
+
+
+## Verify ORC bindings to instructions.
# RUN: llvm-bolt %t.exe --print-normalized --dump-orc --print-orc -o %t.out \
+# RUN: --bolt-info=0 |& FileCheck %s
+
+
+## Verify ORC bindings after rewrite.
+
+# RUN: llvm-bolt %t.out -o %t.out.1 --print-normalized --print-orc \
# RUN: |& FileCheck %s
-# CHECK: BOLT-INFO: Linux kernel binary detected
-# CHECK: BOLT-INFO: ORC unwind information:
-# CHECK-NEXT: {sp: 8, bp: 0, info: 0x5}: _start
-# CHECK-NEXT: {sp: 0, bp: 0, info: 0x0}: _start
-# CHECK-NEXT: {sp: 8, bp: 0, info: 0x5}: foo
-# CHECK-NEXT: {sp: 16, bp: -16, info: 0x15}: foo
-# CHECK-NEXT: {sp: 16, bp: -16, info: 0x14}: foo
-# CHECK-NEXT: {sp: 8, bp: 0, info: 0x5}: foo
-# CHECK-NEXT: {sp: 0, bp: 0, info: 0x0}: bar
-# CHECK-NEXT: {sp: 0, bp: 0, info: 0x0}: bar
+## Verify ORC binding after rewrite when some of the functions are skipped.
+
+# RUN: llvm-bolt %t.exe -o %t.out --skip-funcs=bar --bolt-info=0
+# RUN: llvm-bolt %t.out -o %t.out.1 --print-normalized --print-orc \
+# RUN: |& FileCheck %s
+# CHECK: BOLT-INFO: Linux kernel binary detected
.text
.globl _start
.type _start, %function
_start:
+# CHECK: Binary Function "_start"
call foo
# CHECK: callq foo # ORC: {sp: 8, bp: 0, info: 0x5}
@@ -32,6 +56,8 @@ _start:
.globl foo
.type foo, %function
foo:
+# CHECK: Binary Function "foo"
+
push %rbp
# CHECK: pushq %rbp # ORC: {sp: 8, bp: 0, info: 0x5}
.L1:
@@ -40,12 +66,16 @@ foo:
.L2:
pop %rbp
# CHECK: popq %rbp # ORC: {sp: 16, bp: -16, info: 0x14}
+ nop
.L3:
ret
# CHECK: retq # ORC: {sp: 8, bp: 0, info: 0x5}
.size foo, .-foo
+ .globl bar
+ .type bar, %function
bar:
+# CHECK: Binary Function "bar"
ret
## Same ORC info propagated from foo above.
# CHECK: retq # ORC: {sp: 8, bp: 0, info: 0x5}
More information about the llvm-commits
mailing list