[llvm] [BOLT] Add reading support for Linux kernel .altinstructions section (PR #84283)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 7 11:31:51 PST 2024


https://github.com/maksfb updated https://github.com/llvm/llvm-project/pull/84283

>From 433e5f041a8131bf30ad6e9e793f894085eed055 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Wed, 6 Mar 2024 23:19:22 -0800
Subject: [PATCH 1/2] [BOLT] Add reading support for Linux kernel
 .altinstructions section

Read .altinstructions and annotate instructions that have alternative
sequences with "AltInst" annotation. Note that some instructions may
have more than one alternatives, in which case they will have multiple
annotations in the form "AltInst", "AltInst2", "AltInst3", etc.
---
 bolt/lib/Rewrite/LinuxKernelRewriter.cpp | 141 +++++++++++++++++++++++
 1 file changed, 141 insertions(+)

diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
index 964a47346592fc..847e12611d0867 100644
--- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
+++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
@@ -27,6 +27,21 @@ using namespace bolt;
 
 namespace opts {
 
+static cl::opt<bool>
+    AltInstHasPadLen("alt-inst-has-padlen",
+                     cl::desc("specify that .altinstructions has padlen field"),
+                     cl::init(false), cl::Hidden, cl::cat(BoltCategory));
+
+static cl::opt<uint32_t>
+    AltInstFeatureSize("alt-inst-feature-size",
+                       cl::desc("size of feature field in .altinstructions"),
+                       cl::init(2), cl::Hidden, cl::cat(BoltCategory));
+
+static cl::opt<bool>
+    DumpAltInstructions("dump-alt-instructions",
+                        cl::desc("dump Linux alernative instructions info"),
+                        cl::init(false), cl::Hidden, cl::cat(BoltCategory));
+
 static cl::opt<bool>
     DumpExceptions("dump-linux-exceptions",
                    cl::desc("dump Linux kernel exception table"),
@@ -157,6 +172,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
   /// Alignment of paravirtual patch structures.
   static constexpr size_t PARA_PATCH_ALIGN = 8;
 
+  /// .altinstructions section.
+  ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address;
+
   /// Section containing Linux bug table.
   ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address;
 
@@ -205,6 +223,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
 
   Error readBugTable();
 
+  /// Read alternative instruction info from .altinstructions.
+  Error readAltInstructions();
+
   /// Mark instructions referenced by kernel metadata.
   Error markInstructions();
 
@@ -232,6 +253,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
     if (Error E = readBugTable())
       return E;
 
+    if (Error E = readAltInstructions())
+      return E;
+
     return Error::success();
   }
 
@@ -1132,6 +1156,123 @@ Error LinuxKernelRewriter::readBugTable() {
   return Error::success();
 }
 
+/// The kernel can replace certain instruction sequences depending on hardware
+/// it is running on and features specified during boot time. The information
+/// about alternative instruction sequences is stored in .altinstructions
+/// section. The format of entries in this section is defined in
+/// arch/x86/include/asm/alternative.h:
+///
+///   struct alt_instr {
+///     s32 instr_offset;
+///     s32 repl_offset;
+///     uXX feature;
+///     u8  instrlen;
+///     u8  replacementlen;
+///	    u8  padlen;         // present in older kernels
+///   } __packed;
+///
+/// Note the structures is packed.
+Error LinuxKernelRewriter::readAltInstructions() {
+  AltInstrSection = BC.getUniqueSectionByName(".altinstructions");
+  if (!AltInstrSection)
+    return Error::success();
+
+  const uint64_t Address = AltInstrSection->getAddress();
+  DataExtractor DE = DataExtractor(AltInstrSection->getContents(),
+                                   BC.AsmInfo->isLittleEndian(),
+                                   BC.AsmInfo->getCodePointerSize());
+  uint64_t EntryID = 0;
+  DataExtractor::Cursor Cursor(0);
+  while (Cursor && !DE.eof(Cursor)) {
+    const uint64_t OrgInstAddress =
+        Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
+    const uint64_t AltInstAddress =
+        Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
+    const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize);
+    const uint8_t OrgSize = DE.getU8(Cursor);
+    const uint8_t AltSize = DE.getU8(Cursor);
+
+    // Older kernels may have the padlen field.
+    const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0;
+
+    if (!Cursor)
+      return createStringError(errc::executable_format_error,
+                               "out of bounds while reading .altinstructions");
+
+    ++EntryID;
+
+    if (opts::DumpAltInstructions) {
+      BC.outs() << "Alternative instruction entry: " << EntryID
+                << "\n\tOrg:     0x" << Twine::utohexstr(OrgInstAddress)
+                << "\n\tAlt:     0x" << Twine::utohexstr(AltInstAddress)
+                << "\n\tFeature: 0x" << Twine::utohexstr(Feature)
+                << "\n\tOrgSize: " << (int)OrgSize
+                << "\n\tAltSize: " << (int)AltSize << '\n';
+      if (opts::AltInstHasPadLen)
+        BC.outs() << "\tPadLen:  " << (int)PadLen << '\n';
+    }
+
+    if (AltSize > OrgSize)
+      return createStringError(errc::executable_format_error,
+                               "error reading .altinstructions");
+
+    BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress);
+    if (!BF && opts::Verbosity) {
+      BC.outs() << "BOLT-INFO: no function matches address 0x"
+                << Twine::utohexstr(OrgInstAddress)
+                << " of instruction from .altinstructions\n";
+    }
+
+    BinaryFunction *AltBF =
+        BC.getBinaryFunctionContainingAddress(AltInstAddress);
+    if (AltBF && BC.shouldEmit(*AltBF)) {
+      BC.errs()
+          << "BOLT-WARNING: alternative instruction sequence found in function "
+          << *AltBF << '\n';
+      AltBF->setIgnored();
+    }
+
+    if (!BF || !BC.shouldEmit(*BF))
+      continue;
+
+    if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize())
+      return createStringError(errc::executable_format_error,
+                               "error reading .altinstructions");
+
+    MCInst *Inst =
+        BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress());
+    if (!Inst)
+      return createStringError(errc::executable_format_error,
+                               "no instruction at address 0x%" PRIx64
+                               " referenced by .altinstructions entry %d",
+                               OrgInstAddress, EntryID);
+
+    // There could be more than one alternative instruction sequences for the
+    // same original instruction. Annotate each alternative separately.
+    std::string AnnotationName = "AltInst";
+    unsigned N = 2;
+    while (BC.MIB->hasAnnotation(*Inst, AnnotationName))
+      AnnotationName = "AltInst" + std::to_string(N++);
+
+    BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID);
+
+    // Annotate all instructions from the original sequence. Note that it's not
+    // the most efficient way to look for instructions in the address range,
+    // but since alternative instructions are uncommon, it will do for now.
+    for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) {
+      Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset -
+                                        BF->getAddress());
+      if (Inst)
+        BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID);
+    }
+  }
+
+  BC.outs() << "BOLT-INFO: parsed " << EntryID
+            << " alternative instruction entries\n";
+
+  return Error::success();
+}
+
 } // namespace
 
 std::unique_ptr<MetadataRewriter>

>From 100df3d6a39bb87a6443c3c61c36eab233825a74 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Thu, 7 Mar 2024 11:21:59 -0800
Subject: [PATCH 2/2] fixup! [BOLT] Add reading support for Linux kernel
 .altinstructions section

---
 bolt/lib/Rewrite/LinuxKernelRewriter.cpp |  2 +-
 bolt/test/X86/linux-alt-instruction.s    | 92 ++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/X86/linux-alt-instruction.s

diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
index 847e12611d0867..ecfbea3cb51185 100644
--- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
+++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
@@ -39,7 +39,7 @@ static cl::opt<uint32_t>
 
 static cl::opt<bool>
     DumpAltInstructions("dump-alt-instructions",
-                        cl::desc("dump Linux alernative instructions info"),
+                        cl::desc("dump Linux alternative instructions info"),
                         cl::init(false), cl::Hidden, cl::cat(BoltCategory));
 
 static cl::opt<bool>
diff --git a/bolt/test/X86/linux-alt-instruction.s b/bolt/test/X86/linux-alt-instruction.s
new file mode 100644
index 00000000000000..96e77545b654bc
--- /dev/null
+++ b/bolt/test/X86/linux-alt-instruction.s
@@ -0,0 +1,92 @@
+# REQUIRES: system-linux
+
+## Check that BOLT correctly parses the Linux kernel .altinstructions section
+## and annotates alternative instructions.
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
+# RUN:   -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie
+# RUN: llvm-bolt %t.exe --print-normalized --keep-nops -o %t.out \
+# RUN:   --alt-inst-feature-size=2 | FileCheck %s
+
+## Older kernels used to have padlen field in alt_instr. Check compatibility.
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --defsym PADLEN=1 \
+# RUN:   %s -o %t.o
+# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
+# RUN:   -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie
+# RUN: llvm-bolt %t.exe --print-normalized --keep-nops --alt-inst-has-padlen \
+# RUN:   -o %t.out | FileCheck %s
+
+## Check with a larger size of "feature" field in alt_instr.
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
+# RUN:   --defsym FEATURE_SIZE_4=1 %s -o %t.o
+# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
+# RUN:   -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie
+# RUN: llvm-bolt %t.exe --print-normalized --keep-nops \
+# RUN:   --alt-inst-feature-size=4 -o %t.out | FileCheck %s
+
+# CHECK:      BOLT-INFO: Linux kernel binary detected
+# CHECK:      BOLT-INFO: parsed 2 alternative instruction entries
+
+  .text
+  .globl _start
+  .type _start, %function
+_start:
+# CHECK: Binary Function "_start"
+.L0:
+  rdtsc
+# CHECK:      rdtsc
+# CHECK-SAME: AltInst: 1
+# CHECK-SAME: AltInst2: 2
+  nop
+# CHECK-NEXT: nop
+# CHECK-SAME: AltInst: 1
+# CHECK-SAME: AltInst2: 2
+  nop
+  nop
+.L1:
+  ret
+  .size _start, .-_start
+
+  .section .altinstr_replacement,"ax", at progbits
+.A0:
+  lfence
+  rdtsc
+.A1:
+  rdtscp
+.Ae:
+
+## Alternative instruction info.
+  .section .altinstructions,"a", at progbits
+
+  .long .L0 - .   # org instruction
+  .long .A0 - .   # alt instruction
+.ifdef FEATURE_SIZE_4
+  .long 0x72      # feature flags
+.else
+  .word 0x72      # feature flags
+.endif
+  .byte .L1 - .L0 # org size
+  .byte .A1 - .A0 # alt size
+.ifdef PADLEN
+  .byte 0
+.endif
+
+  .long .L0 - .   # org instruction
+  .long .A1 - .   # alt instruction
+.ifdef FEATURE_SIZE_4
+  .long 0x3b      # feature flags
+.else
+  .word 0x3b      # feature flags
+.endif
+  .byte .L1 - .L0 # org size
+  .byte .Ae - .A1 # alt size
+.ifdef PADLEN
+  .byte 0
+.endif
+
+## Fake Linux Kernel sections.
+  .section __ksymtab,"a", at progbits
+  .section __ksymtab_gpl,"a", at progbits



More information about the llvm-commits mailing list