[llvm] [BOLT] Add reading support for Linux kernel .parainstructions section (PR #83965)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 4 22:39:34 PST 2024


https://github.com/maksfb created https://github.com/llvm/llvm-project/pull/83965

Read .parainstruction section and mark call instructions with ParaSite annotations.

>From 63bcc3ee3637d821781305514c91c358895d3eb6 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Thu, 23 Nov 2023 14:04:19 -0800
Subject: [PATCH] [BOLT] Add reading support for Linux kernel .parainstructions
 section

Read .parainstruction section and mark call instructions with ParaSite
annotations.
---
 bolt/lib/Rewrite/LinuxKernelRewriter.cpp | 84 ++++++++++++++++++++++++
 bolt/test/X86/linux-parainstructions.s   | 54 +++++++++++++++
 2 files changed, 138 insertions(+)
 create mode 100644 bolt/test/X86/linux-parainstructions.s

diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
index 145acd32356cd1..a78397199972c0 100644
--- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
+++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
@@ -36,6 +36,10 @@ static cl::opt<bool>
     DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"),
             cl::init(false), cl::Hidden, cl::cat(BoltCategory));
 
+static cl::opt<bool> DumpParavirtualPatchSites(
+    "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"),
+    cl::init(false), cl::Hidden, cl::cat(BoltCategory));
+
 static cl::opt<bool> DumpStaticCalls("dump-static-calls",
                                      cl::desc("dump Linux kernel static calls"),
                                      cl::init(false), cl::Hidden,
@@ -147,6 +151,12 @@ class LinuxKernelRewriter final : public MetadataRewriter {
   /// Functions with exception handling code.
   DenseSet<BinaryFunction *> FunctionsWithExceptions;
 
+  /// Section with paravirtual patch sites.
+  ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address;
+
+  /// Alignment of paravirtual patch structures.
+  static constexpr size_t PARA_PATCH_ALIGN = 8;
+
   /// Insert an LKMarker for a given code pointer \p PC from a non-code section
   /// \p SectionName.
   void insertLKMarker(uint64_t PC, uint64_t SectionOffset,
@@ -187,6 +197,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
   Error readExceptionTable();
   Error rewriteExceptionTable();
 
+  /// Paravirtual instruction patch sites.
+  Error readParaInstructions();
+
   /// Mark instructions referenced by kernel metadata.
   Error markInstructions();
 
@@ -208,6 +221,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
     if (Error E = readExceptionTable())
       return E;
 
+    if (Error E = readParaInstructions())
+      return E;
+
     return Error::success();
   }
 
@@ -1013,6 +1029,74 @@ Error LinuxKernelRewriter::rewriteExceptionTable() {
   return Error::success();
 }
 
+/// .parainsrtuctions section contains information for patching parvirtual call
+/// instructions during runtime. The entries in the section are in the form:
+///
+///    struct paravirt_patch_site {
+///      u8 *instr;    /* original instructions */
+///      u8 type;      /* type of this instruction */
+///      u8 len;       /* length of original instruction */
+///    };
+///
+/// Note that the structures are aligned at 8-byte boundary.
+Error LinuxKernelRewriter::readParaInstructions() {
+  ParavirtualPatchSection = BC.getUniqueSectionByName(".parainstructions");
+  if (!ParavirtualPatchSection)
+    return Error::success();
+
+  DataExtractor DE = DataExtractor(ParavirtualPatchSection->getContents(),
+                                   BC.AsmInfo->isLittleEndian(),
+                                   BC.AsmInfo->getCodePointerSize());
+  uint32_t EntryID = 0;
+  DataExtractor::Cursor Cursor(0);
+  while (Cursor && !DE.eof(Cursor)) {
+    const uint64_t NextOffset = alignTo(Cursor.tell(), Align(PARA_PATCH_ALIGN));
+    if (!DE.isValidOffset(NextOffset))
+      break;
+
+    Cursor.seek(NextOffset);
+
+    const uint64_t InstrLocation = DE.getU64(Cursor);
+    const uint8_t Type = DE.getU8(Cursor);
+    const uint8_t Len = DE.getU8(Cursor);
+
+    if (!Cursor)
+      return createStringError(errc::executable_format_error,
+                               "out of bounds while reading .parainstructions");
+
+    ++EntryID;
+
+    if (opts::DumpParavirtualPatchSites) {
+      BC.outs() << "Paravirtual patch site: " << EntryID << '\n';
+      BC.outs() << "\tInstr: 0x" << Twine::utohexstr(InstrLocation)
+                << "\n\tType:  0x" << Twine::utohexstr(Type) << "\n\tLen:   0x"
+                << Twine::utohexstr(Len) << '\n';
+    }
+
+    BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstrLocation);
+    if (!BF && opts::Verbosity) {
+      BC.outs() << "BOLT-INFO: no function matches address 0x"
+                << Twine::utohexstr(InstrLocation)
+                << " referenced by paravirutal patch site\n";
+    }
+
+    if (BF && BC.shouldEmit(*BF)) {
+      MCInst *Inst =
+          BF->getInstructionAtOffset(InstrLocation - BF->getAddress());
+      if (!Inst)
+        return createStringError(errc::executable_format_error,
+                                 "no instruction at address 0x%" PRIx64
+                                 " in paravirtual call site %d",
+                                 InstrLocation, EntryID);
+      BC.MIB->addAnnotation(*Inst, "ParaSite", EntryID);
+    }
+  }
+
+  BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n";
+
+  return Error::success();
+}
+
 } // namespace
 
 std::unique_ptr<MetadataRewriter>
diff --git a/bolt/test/X86/linux-parainstructions.s b/bolt/test/X86/linux-parainstructions.s
new file mode 100644
index 00000000000000..4bdfde5fb7f24b
--- /dev/null
+++ b/bolt/test/X86/linux-parainstructions.s
@@ -0,0 +1,54 @@
+# REQUIRES: system-linux
+
+## Check that BOLT correctly parses the Linux kernel .parainstructions section.
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
+# RUN:   -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie
+
+## Verify paravirtual bindings to instructions.
+
+# RUN: llvm-bolt %t.exe --print-normalized -o %t.out | FileCheck %s
+
+# CHECK:      BOLT-INFO: Linux kernel binary detected
+# CHECK:      BOLT-INFO: parsed 2 paravirtual patch sites
+
+  .rodata
+fptr:
+  .quad 0
+
+  .text
+  .globl _start
+  .type _start, %function
+_start:
+# CHECK: Binary Function "_start"
+  nop
+.L1:
+  call *fptr(%rip)
+# CHECK:      call
+# CHECK-SAME: ParaSite: 1
+  nop
+.L2:
+  call *fptr(%rip)
+# CHECK:      call
+# CHECK-SAME: ParaSite: 2
+  ret
+  .size _start, .-_start
+
+
+## Paravirtual patch sites.
+  .section .parainstructions,"a", at progbits
+
+  .balign 8
+  .quad .L1      # instruction
+  .byte 1        # type
+  .byte 7        # length
+
+  .balign 8
+  .quad .L2      # instruction
+  .byte 1        # type
+  .byte 7        # length
+
+## Fake Linux Kernel sections.
+  .section __ksymtab,"a", at progbits
+  .section __ksymtab_gpl,"a", at progbits



More information about the llvm-commits mailing list