[llvm] [BOLT] Detect Linux kernel based on ELF program headers (PR #80086)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 30 16:53:15 PST 2024


https://github.com/maksfb created https://github.com/llvm/llvm-project/pull/80086

Check if program header addresses fall into the kernel space to detect a Linux kernel binary on x86-64.

Delete opts::LinuxKernelMode and use BinaryContext::IsLinuxKernel instead.

>From 8bbda67556e13c7c0465defc80d9100a28c5dd00 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Tue, 30 Jan 2024 16:22:55 -0800
Subject: [PATCH] [BOLT] Detect Linux kernel based on ELF program headers

Check if program header addresses fall into the kernel space to detect
a Linux kernel binary on x86-64.

Delete opts::LinuxKernelMode and use BinaryContext::IsLinuxKernel
instead.
---
 bolt/include/bolt/Core/BinaryContext.h    |  3 +++
 bolt/include/bolt/Utils/CommandLineOpts.h |  1 -
 bolt/lib/Profile/DataAggregator.cpp       |  6 +++---
 bolt/lib/Rewrite/LinuxKernelRewriter.cpp  |  9 ++++-----
 bolt/lib/Rewrite/RewriteInstance.cpp      | 16 ++++++++++------
 bolt/lib/Utils/CommandLineOpts.cpp        |  1 -
 bolt/test/X86/linux-orc.s                 | 10 ++--------
 7 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index f0e7a8272ad0e..65a3442695703 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -602,6 +602,9 @@ class BinaryContext {
 
   std::unique_ptr<MCAsmBackend> MAB;
 
+  /// Indicates if the binary is Linux kernel.
+  bool IsLinuxKernel{false};
+
   /// Indicates if relocations are available for usage.
   bool HasRelocations{false};
 
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index 7b654f19f6d45..30e8bd777b3ca 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -18,7 +18,6 @@
 namespace opts {
 
 extern bool HeatmapMode;
-extern bool LinuxKernelMode;
 
 extern llvm::cl::OptionCategory BoltCategory;
 extern llvm::cl::OptionCategory BoltDiffCategory;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index be1e348b338f0..6a64bcde911e6 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -524,7 +524,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
       ErrorCallback(ReturnCode, ErrBuf);
   };
 
-  if (opts::LinuxKernelMode) {
+  if (BC.IsLinuxKernel) {
     // Current MMap parsing logic does not work with linux kernel.
     // MMap entries for linux kernel uses PERF_RECORD_MMAP
     // format instead of typical PERF_RECORD_MMAP2 format.
@@ -1056,7 +1056,7 @@ ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
   if (std::error_code EC = PIDRes.getError())
     return EC;
   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
-  if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
+  if (!BC->IsLinuxKernel && MMapInfoIter == BinaryMMapInfo.end()) {
     consumeRestOfLine();
     return make_error_code(errc::no_such_process);
   }
@@ -1277,7 +1277,7 @@ std::error_code DataAggregator::printLBRHeatMap() {
   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
                      TimerGroupDesc, opts::TimeAggregator);
 
-  if (opts::LinuxKernelMode) {
+  if (BC->IsLinuxKernel) {
     opts::HeatmapMaxAddress = 0xffffffffffffffff;
     opts::HeatmapMinAddress = KernelBaseAddr;
   }
diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
index d74177c1c4614..c8674d6b837ad 100644
--- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
+++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
@@ -207,9 +207,7 @@ void LinuxKernelRewriter::insertLKMarker(uint64_t PC, uint64_t SectionOffset,
 }
 
 void LinuxKernelRewriter::processLKSections() {
-  assert(opts::LinuxKernelMode &&
-         "process Linux Kernel special sections and their relocations only in "
-         "linux kernel mode.\n");
+  assert(BC.IsLinuxKernel && "Linux kernel binary expected.");
 
   processLKExTable();
   processLKPCIFixup();
@@ -290,8 +288,9 @@ void LinuxKernelRewriter::processLKExTable() {
 void LinuxKernelRewriter::processLKPCIFixup() {
   ErrorOr<BinarySection &> SectionOrError =
       BC.getUniqueSectionByName(".pci_fixup");
-  assert(SectionOrError &&
-         ".pci_fixup section not found in Linux Kernel binary");
+  if (!SectionOrError)
+    return;
+
   const uint64_t SectionSize = SectionOrError->getSize();
   const uint64_t SectionAddress = SectionOrError->getAddress();
   assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16");
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index d24bd18c08409..0645308afafd5 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -512,6 +512,9 @@ Error RewriteInstance::discoverStorage() {
                                                      Phdr.p_offset,
                                                      Phdr.p_filesz,
                                                      Phdr.p_align};
+      if (BC->TheTriple->getArch() == llvm::Triple::x86_64 &&
+          Phdr.p_vaddr >= 0xFFFF'FFFF'8000'0000)
+        BC->IsLinuxKernel = true;
       break;
     case ELF::PT_INTERP:
       BC->HasInterpHeader = true;
@@ -519,6 +522,9 @@ Error RewriteInstance::discoverStorage() {
     }
   }
 
+  if (BC->IsLinuxKernel)
+    outs() << "BOLT-INFO: Linux kernel binary detected\n";
+
   for (const SectionRef &Section : InputFile->sections()) {
     Expected<StringRef> SectionNameOrErr = Section.getName();
     if (Error E = SectionNameOrErr.takeError())
@@ -562,7 +568,7 @@ Error RewriteInstance::discoverStorage() {
   if (opts::Hugify && !BC->HasFixedLoadAddress)
     NextAvailableAddress += BC->PageAlign;
 
-  if (!opts::UseGnuStack) {
+  if (!opts::UseGnuStack && !BC->IsLinuxKernel) {
     // This is where the black magic happens. Creating PHDR table in a segment
     // other than that containing ELF header is tricky. Some loaders and/or
     // parts of loaders will apply e_phoff from ELF header assuming both are in
@@ -751,7 +757,7 @@ Error RewriteInstance::run() {
   if (opts::Instrument && !BC->IsStaticExecutable)
     updateRtFiniReloc();
 
-  if (opts::LinuxKernelMode) {
+  if (BC->IsLinuxKernel) {
     errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n";
     return Error::success();
   } else if (opts::OutputFilename == "/dev/null") {
@@ -1284,7 +1290,7 @@ void RewriteInstance::discoverFileObjects() {
     }
   }
 
-  if (!opts::LinuxKernelMode) {
+  if (!BC->IsLinuxKernel) {
     // Read all relocations now that we have binary functions mapped.
     processRelocations();
   }
@@ -1813,8 +1819,6 @@ Error RewriteInstance::readSpecialSections() {
                << "\n");
     if (isDebugSection(SectionName))
       HasDebugInfo = true;
-    if (isKSymtabSection(SectionName))
-      opts::LinuxKernelMode = true;
   }
 
   // Set IsRelro section attribute based on PT_GNU_RELRO segment.
@@ -3037,7 +3041,7 @@ void RewriteInstance::preprocessProfileData() {
 }
 
 void RewriteInstance::initializeMetadataManager() {
-  if (opts::LinuxKernelMode)
+  if (BC->IsLinuxKernel)
     MetadataManager.registerRewriter(createLinuxKernelRewriter(*BC));
 
   MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC));
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index a1df5de262340..e910fa4f86722 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -29,7 +29,6 @@ const char *BoltRevision =
 namespace opts {
 
 bool HeatmapMode = false;
-bool LinuxKernelMode = false;
 
 cl::OptionCategory BoltCategory("BOLT generic options");
 cl::OptionCategory BoltDiffCategory("BOLTDIFF generic options");
diff --git a/bolt/test/X86/linux-orc.s b/bolt/test/X86/linux-orc.s
index bb3ab10754544..3d8a3d77ec2ea 100644
--- a/bolt/test/X86/linux-orc.s
+++ b/bolt/test/X86/linux-orc.s
@@ -3,11 +3,12 @@
 ## Check that BOLT correctly reads ORC unwind information used by Linux kernel.
 
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
-# RUN: %clang %cflags %t.o -o %t.exe
+# RUN: %clang %cflags -nostdlib %t.o -o %t.exe -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr
 
 # RUN: llvm-bolt %t.exe --print-normalized --dump-orc --print-orc -o %t.out \
 # RUN:   |& FileCheck %s
 
+# CHECK:      BOLT-INFO: Linux kernel binary detected
 # CHECK: 			BOLT-INFO: ORC unwind information:
 # CHECK-NEXT: {sp: 8, bp: 0, info: 0x5}: _start
 # CHECK-NEXT: {sp: 0, bp: 0, info: 0x0}: _start
@@ -22,18 +23,15 @@
   .globl _start
   .type _start, %function
 _start:
-  .cfi_startproc
 
   call foo
 # CHECK:      callq foo           # ORC: {sp: 8, bp: 0, info: 0x5}
   ret
-  .cfi_endproc
   .size _start, .-_start
 
   .globl foo
   .type foo, %function
 foo:
-  .cfi_startproc
   push %rbp
 # CHECK:      pushq   %rbp        # ORC: {sp: 8, bp: 0, info: 0x5}
 .L1:
@@ -45,16 +43,13 @@ foo:
 .L3:
   ret
 # CHECK:      retq                # ORC: {sp: 8, bp: 0, info: 0x5}
-  .cfi_endproc
   .size foo, .-foo
 
 bar:
-  .cfi_startproc
 	ret
 ## Same ORC info propagated from foo above.
 # CHECK:      retq                # ORC: {sp: 8, bp: 0, info: 0x5}
 .L4:
-  .cfi_endproc
   .size bar, .-bar
 
   .section .orc_unwind,"a", at progbits
@@ -131,4 +126,3 @@ bar:
 ## Fake Linux Kernel sections.
   .section __ksymtab,"a", at progbits
   .section __ksymtab_gpl,"a", at progbits
-  .section .pci_fixup,"a", at progbits



More information about the llvm-commits mailing list