[llvm] [BOLT][AArch64] Include constant islands in disassembly (PR #125961)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 5 16:12:35 PST 2025


https://github.com/maksfb updated https://github.com/llvm/llvm-project/pull/125961

>From f8fd35da79b4bf34d51c36d5e60bc23c4c0c5c35 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Wed, 5 Feb 2025 15:04:42 -0800
Subject: [PATCH] [BOLT] Include constant islands in disassembly

When printing disassembly of a function with constant islands, include
the island info in the dump.

At the moment, only print islands in pre-CFG state. Include islands that
are interleaved with instructions.
---
 bolt/include/bolt/Core/BinaryContext.h  | 11 ++++++++
 bolt/include/bolt/Core/BinaryFunction.h |  5 ++++
 bolt/lib/Core/BinaryContext.cpp         | 37 +++++++++++++++++++++++++
 bolt/lib/Core/BinaryFunction.cpp        | 34 +++++++++++++++++++++++
 bolt/test/AArch64/data-in-code.s        | 31 +++++++++++++++++++++
 5 files changed, 118 insertions(+)
 create mode 100644 bolt/test/AArch64/data-in-code.s

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 94fe4aa8aa0e57f..8bec1db70e25a2c 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -1435,6 +1435,17 @@ class BinaryContext {
                         bool PrintRelocations = false,
                         StringRef Endl = "\n") const;
 
+  /// Print data when embedded in the instruction stream keeping the format
+  /// similar to printInstruction().
+  void printData(raw_ostream &OS, ArrayRef<uint8_t> Data,
+                 uint64_t Offset) const;
+
+  /// Extract data from the binary corresponding to [Address, Address + Size)
+  /// range. Return an empty ArrayRef if the address range does not belong to
+  /// any section in the binary, crosses a section boundary, or falls into a
+  /// virtual section.
+  ArrayRef<uint8_t> extractData(uint64_t Address, uint64_t Size) const;
+
   /// Print a range of instructions.
   template <typename Itr>
   uint64_t
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index e8b2757f7db215e..942840a7621fd97 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2060,6 +2060,11 @@ class BinaryFunction {
     return Islands ? Islands->getAlignment() : 1;
   }
 
+  /// If there is a constant island in the range [StartOffset, EndOffset),
+  /// return its address.
+  std::optional<uint64_t> getIslandInRange(uint64_t StartOffset,
+                                           uint64_t EndOffset) const;
+
   uint64_t
   estimateConstantIslandSize(const BinaryFunction *OnBehalfOf = nullptr) const {
     if (!Islands)
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index f5e11358daaa32b..1327cbc6a62ba41 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1942,6 +1942,43 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
     OS << " discriminator:" << Row.Discriminator;
 }
 
+ArrayRef<uint8_t> BinaryContext::extractData(uint64_t Address,
+                                             uint64_t Size) const {
+  ArrayRef<uint8_t> Res;
+
+  const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
+  if (!Section || Section->isVirtual())
+    return Res;
+
+  if (!Section->containsRange(Address, Size))
+    return Res;
+
+  auto *Bytes =
+      reinterpret_cast<const uint8_t *>(Section->getContents().data());
+  return ArrayRef<uint8_t>(Bytes + Address - Section->getAddress(), Size);
+}
+
+void BinaryContext::printData(raw_ostream &OS, ArrayRef<uint8_t> Data,
+                              uint64_t Offset) const {
+  DataExtractor DE(Data, AsmInfo->isLittleEndian(),
+                   AsmInfo->getCodePointerSize());
+  uint64_t DataOffset = 0;
+  while (DataOffset + 4 <= Data.size()) {
+    OS << format("    %08" PRIx64 ": \t.word\t0x", Offset + DataOffset);
+    const auto Word = DE.getUnsigned(&DataOffset, 4);
+    OS << Twine::utohexstr(Word) << '\n';
+  }
+  if (DataOffset + 2 <= Data.size()) {
+    OS << format("    %08" PRIx64 ": \t.short\t0x", Offset + DataOffset);
+    const auto Short = DE.getUnsigned(&DataOffset, 2);
+    OS << Twine::utohexstr(Short) << '\n';
+  }
+  if (DataOffset + 1 == Data.size()) {
+    OS << format("    %08" PRIx64 ": \t.byte\t0x%x\n", Offset + DataOffset,
+                 Data[DataOffset]);
+  }
+}
+
 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
                                      uint64_t Offset,
                                      const BinaryFunction *Function,
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 88562a60dd50da2..317ade95c51f30d 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -491,11 +491,27 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
   // Offset of the instruction in function.
   uint64_t Offset = 0;
 
+  auto printConstantIslandInRange = [&](uint64_t Offset, uint64_t Size) {
+    std::optional<uint64_t> IslandOffset =
+        getIslandInRange(Offset, Offset + Size);
+
+    if (!IslandOffset)
+      return;
+
+    const size_t IslandSize = getSizeOfDataInCodeAt(*IslandOffset);
+    BC.printData(OS, BC.extractData(getAddress() + *IslandOffset, IslandSize),
+                 *IslandOffset);
+  };
+
   if (BasicBlocks.empty() && !Instructions.empty()) {
     // Print before CFG was built.
+    uint64_t PrevOffset = 0;
     for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
       Offset = II.first;
 
+      // Print any constant islands inbeetween the instructions.
+      printConstantIslandInRange(PrevOffset, Offset);
+
       // Print label if exists at this offset.
       auto LI = Labels.find(Offset);
       if (LI != Labels.end()) {
@@ -506,7 +522,12 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
       }
 
       BC.printInstruction(OS, II.second, Offset, this);
+
+      PrevOffset = Offset;
     }
+
+    // Print any data at the end of the function.
+    printConstantIslandInRange(PrevOffset, getMaxSize());
   }
 
   StringRef SplitPointMsg = "";
@@ -1048,6 +1069,19 @@ size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
   return getSize() - Offset;
 }
 
+std::optional<uint64_t>
+BinaryFunction::getIslandInRange(uint64_t StartOffset,
+                                 uint64_t EndOffset) const {
+  if (!Islands)
+    return std::nullopt;
+
+  auto Iter = llvm::lower_bound(Islands->DataOffsets, StartOffset);
+  if (Iter != Islands->DataOffsets.end() && *Iter < EndOffset)
+    return *Iter;
+
+  return std::nullopt;
+}
+
 bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
   ArrayRef<uint8_t> FunctionData = *getData();
   uint64_t EndOfCode = getSize();
diff --git a/bolt/test/AArch64/data-in-code.s b/bolt/test/AArch64/data-in-code.s
new file mode 100644
index 000000000000000..8d3179a0c33508e
--- /dev/null
+++ b/bolt/test/AArch64/data-in-code.s
@@ -0,0 +1,31 @@
+## Check that llvm-bolt prints data embedded in code.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags -fno-PIC -no-pie %t.o -o %t.exe -nostdlib \
+# RUN:    -fuse-ld=lld -Wl,-q
+
+## Check disassembly of BOLT input.
+# RUN: llvm-objdump %t.exe -d | FileCheck %s
+
+# RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm | FileCheck %s
+
+.text
+.balign 4
+
+.global _start
+.type _start, %function
+_start:
+  mov x0, #0x0
+  .word 0x4f82e010
+  ret
+  .byte 0x0, 0xff, 0x42
+# CHECK-LABEL: _start
+# CHECK:        mov x0, #0x0
+# CHECK-NEXT:   .word 0x4f82e010
+# CHECK-NEXT:   ret
+# CHECK-NEXT:   .short 0xff00
+# CHECK-NEXT:   .byte 0x42
+.size _start, .-_start
+
+## Force relocation mode.
+  .reloc 0, R_AARCH64_NONE



More information about the llvm-commits mailing list