[llvm] [Bolt] Fix address translation for KASLR kernel (PR #114261)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 19 16:52:38 PST 2024


https://github.com/xur-llvm updated https://github.com/llvm/llvm-project/pull/114261

>From 90016ff1832f18630053ec9bbf8ce13031841cd1 Mon Sep 17 00:00:00 2001
From: Rong Xu <xur at google.com>
Date: Tue, 29 Oct 2024 15:48:40 -0700
Subject: [PATCH 1/2] [Bolt] Fix address translation for KASLR kernel

This patch enables Bolt to analyze kernel addresses that
have been randomized by KASLR. It parses memory map (MMap)
entries within perf files to find the address mapping.
---
 bolt/lib/Core/BinaryContext.cpp      |  4 +++
 bolt/lib/Profile/DataAggregator.cpp  | 44 +++++++++++++++++-----------
 bolt/lib/Rewrite/RewriteInstance.cpp | 14 +++++++--
 3 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index f246750209d6c4..e23bda097543d6 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2024,6 +2024,10 @@ BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
     // Only consider executable segments.
     if (!SegInfo.IsExecutable)
       continue;
+    // For Linux kernel perf files, SegInfo.FileOffset and FileOffset are
+    // irrelvent.
+    if (IsLinuxKernel)
+      return MMapAddress - SegInfo.Address;
     // FileOffset is got from perf event,
     // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
     // If the pagesize is not equal to SegInfo.Alignment.
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index ffd693f9bbaed4..b31d661bed7562 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -530,26 +530,18 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   };
 
   if (BC.IsLinuxKernel) {
-    // Current MMap parsing logic does not work with linux kernel.
-    // MMap entries for linux kernel uses PERF_RECORD_MMAP
-    // format instead of typical PERF_RECORD_MMAP2 format.
-    // Since linux kernel address mapping is absolute (same as
-    // in the ELF file), we avoid parsing MMap in linux kernel mode.
-    // While generating optimized linux kernel binary, we may need
-    // to parse MMap entries.
-
     // In linux kernel mode, we analyze and optimize
     // all linux kernel binary instructions, irrespective
     // of whether they are due to system calls or due to
     // interrupts. Therefore, we cannot ignore interrupt
     // in Linux kernel mode.
     opts::IgnoreInterruptLBR = false;
-  } else {
-    prepareToParse("mmap events", MMapEventsPPI, ErrorCallback);
-    if (parseMMapEvents())
-      errs() << "PERF2BOLT: failed to parse mmap events\n";
   }
 
+  prepareToParse("mmap events", MMapEventsPPI, ErrorCallback);
+  if (parseMMapEvents())
+    errs() << "PERF2BOLT: failed to parse mmap events\n";
+
   prepareToParse("task events", TaskEventsPPI, ErrorCallback);
   if (parseTaskEvents())
     errs() << "PERF2BOLT: failed to parse task events\n";
@@ -1102,6 +1094,11 @@ ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
     return make_error_code(errc::no_such_process);
   }
 
+  if (BC->IsLinuxKernel) {
+    // "-1" is the pid for the Linux kernel
+    MMapInfoIter = BinaryMMapInfo.find(-1);
+  }
+
   while (checkAndConsumeFS()) {
   }
 
@@ -1936,7 +1933,8 @@ DataAggregator::parseMMapEvent() {
   }
   StringRef Line = ParsingBuf.substr(0, LineEnd);
 
-  size_t Pos = Line.find("PERF_RECORD_MMAP2");
+  // This would match both PERF_RECORD_MMAP and PERF_RECORD_MMAP2
+  size_t Pos = Line.find("PERF_RECORD_MMAP");
   if (Pos == StringRef::npos) {
     consumeRestOfLine();
     return std::make_pair(StringRef(), ParsedInfo);
@@ -1944,6 +1942,9 @@ DataAggregator::parseMMapEvent() {
 
   // Line:
   //   {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
+  // Or:
+  //   {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP <-1 | pid>/<tid>: .*
+  //   <file_name>
 
   const StringRef TimeStr =
       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
@@ -1954,9 +1955,14 @@ DataAggregator::parseMMapEvent() {
 
   // Line:
   //   PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
+  // Or:
+  //   PERF_RECORD_MMAP <-1 | pid>/<tid>: [<hexbase>(<hexsize>) .*]: .*
+  //   <file_name>
 
   StringRef FileName = Line.rsplit(FieldSeparator).second;
-  if (FileName.starts_with("//") || FileName.starts_with("[")) {
+  if (FileName == "[kernel.kallsyms]_text")
+    FileName = "[kernel.kallsyms]";
+  else if (FileName.starts_with("//") || FileName.starts_with("[")) {
     consumeRestOfLine();
     return std::make_pair(StringRef(), ParsedInfo);
   }
@@ -1983,8 +1989,11 @@ DataAggregator::parseMMapEvent() {
     return make_error_code(llvm::errc::io_error);
   }
 
-  const StringRef OffsetStr =
-      Line.split('@').second.ltrim().split(FieldSeparator).first;
+  const StringRef OffsetStr = Line.split('@')
+                                  .second.ltrim()
+                                  .split(FieldSeparator)
+                                  .first.split(']')
+                                  .first;
   if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
     reportError("expected mmaped page-aligned offset");
     Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
@@ -2008,7 +2017,8 @@ std::error_code DataAggregator::parseMMapEvents() {
       return EC;
 
     std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
-    if (FileMMapInfo.second.PID == -1)
+    if (FileMMapInfo.first != "[kernel.kallsyms]" &&
+        FileMMapInfo.second.PID == -1)
       continue;
     if (FileMMapInfo.first == "(deleted)")
       continue;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 32ec7abe8b666a..ee88f04b5504da 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -530,8 +530,11 @@ Error RewriteInstance::discoverStorage() {
           Phdr.p_vaddr,  Phdr.p_memsz, Phdr.p_offset,
           Phdr.p_filesz, Phdr.p_align, ((Phdr.p_flags & ELF::PF_X) != 0)};
       if (BC->TheTriple->getArch() == llvm::Triple::x86_64 &&
-          Phdr.p_vaddr >= BinaryContext::KernelStartX86_64)
+          Phdr.p_vaddr >= BinaryContext::KernelStartX86_64) {
         BC->IsLinuxKernel = true;
+        BC->HasFixedLoadAddress = false;
+      }
+
       break;
     case ELF::PT_INTERP:
       BC->HasInterpHeader = true;
@@ -995,8 +998,13 @@ void RewriteInstance::discoverFileObjects() {
     }
 
     if (!Section->isText()) {
-      assert(SymbolType != SymbolRef::ST_Function &&
-             "unexpected function inside non-code section");
+      // In kernel, a function can live in a non-text section. For Example,
+      // lkdtm_rodata_do_nothing() in ./drivers/misc/lkdtm/rodata.c is in
+      // the rodata section.
+      if (!BC->IsLinuxKernel) {
+        assert(SymbolType != SymbolRef::ST_Function &&
+               "unexpected function inside non-code section");
+      }
       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n");
       registerName(SymbolSize);
       continue;

>From 1d1b33407d7129f949a0d1e8b64c13cd490e3596 Mon Sep 17 00:00:00 2001
From: Rong Xu <xur at google.com>
Date: Thu, 19 Dec 2024 16:51:05 -0800
Subject: [PATCH 2/2] [bolt] Use a fixed name for the kernel image

Use a fixed name for the kernel image to process kernel profiles,
regardless of the presence of a build ID or not.

This is to address the issue when the provided kernel image lacks a
matching build ID. This name, "[kernel.kallsyms]", is the default
for kernel DSOs in the Linux kernel source code
(see https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/dso.c#n428).

While "[guest.kernel.kallsyms]" is the kernel DSO name for guest kernel,
support for VM profiles is currently limited. Therefore, we can
skip this name for now.
---
 bolt/lib/Profile/DataAggregator.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index e2b7038f42e6c7..3ee80db6f4e082 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2097,6 +2097,8 @@ std::error_code DataAggregator::parseMMapEvents() {
            << "\" for profile matching\n";
     NameToUse = BuildIDBinaryName;
   }
+  if (BC->IsLinuxKernel)
+    NameToUse = "[kernel.kallsyms]";
 
   auto Range = GlobalMMapInfo.equal_range(NameToUse);
   for (MMapInfo &MMapInfo : llvm::make_second_range(make_range(Range))) {
@@ -2145,7 +2147,7 @@ std::error_code DataAggregator::parseMMapEvents() {
     // Update mapping size.
     const uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size;
     const uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress;
-    if (Size > BinaryMMapInfo[MMapInfo.PID].Size)
+    if (!BC->IsLinuxKernel && Size > BinaryMMapInfo[MMapInfo.PID].Size)
       BinaryMMapInfo[MMapInfo.PID].Size = Size;
   }
 



More information about the llvm-commits mailing list