[llvm] [BOLT][AArch64] Include constant islands in disassembly (PR #125961)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 5 15:49:18 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Maksim Panchenko (maksfb)
<details>
<summary>Changes</summary>
When printing disassembly of a function with constant islands, include the island info in the dump.
At the moment, only print islands in pre-CFG state. Include islands that are interleaved with instructions.
---
Full diff: https://github.com/llvm/llvm-project/pull/125961.diff
5 Files Affected:
- (modified) bolt/include/bolt/Core/BinaryContext.h (+10)
- (modified) bolt/include/bolt/Core/BinaryFunction.h (+5)
- (modified) bolt/lib/Core/BinaryContext.cpp (+36)
- (modified) bolt/lib/Core/BinaryFunction.cpp (+33)
- (added) bolt/test/AArch64/data-in-code.s (+31)
``````````diff
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 94fe4aa8aa0e57..d75fa5aa8a6ce1 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -1435,6 +1435,16 @@ class BinaryContext {
bool PrintRelocations = false,
StringRef Endl = "\n") const;
+ /// Print data when embedded in the instruction stream keeping the format
+ /// similar to printInstruction().
+ void printData(raw_ostream &OS, ArrayRef<uint8_t> Data, uint64_t Offset) const;
+
+ /// Extract data from the binary corresponding to [Address, Address + Size)
+ /// range. Return an empty ArrayRef if the address range does not belong to
+ /// any section in the binary, crosses a section boundary, or falls into a
+ /// virtual section.
+ ArrayRef<uint8_t> extractData(uint64_t Address, uint64_t Size) const;
+
/// Print a range of instructions.
template <typename Itr>
uint64_t
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index e8b2757f7db215..18a2cad3cda123 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2060,6 +2060,11 @@ class BinaryFunction {
return Islands ? Islands->getAlignment() : 1;
}
+ /// If there is a constant island in the range [StartOffset, EndOffset),
+ /// return its address.
+ std::optional<uint64_t>
+ getIslandInRange(uint64_t StartOffset, uint64_t EndOffset) const;
+
uint64_t
estimateConstantIslandSize(const BinaryFunction *OnBehalfOf = nullptr) const {
if (!Islands)
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index f5e11358daaa32..8fcab1dfdec3fd 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1942,6 +1942,42 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
OS << " discriminator:" << Row.Discriminator;
}
+ArrayRef<uint8_t>
+BinaryContext::extractData(uint64_t Address, uint64_t Size) const {
+ ArrayRef<uint8_t> Res;
+
+ const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
+ if (!Section || Section->isVirtual())
+ return Res;
+
+ if (!Section->containsRange(Address, Size))
+ return Res;
+
+ auto *Bytes =
+ reinterpret_cast<const uint8_t *>(Section->getContents().data());
+ return ArrayRef<uint8_t>(Bytes + Address - Section->getAddress(), Size);
+}
+
+void BinaryContext::printData(raw_ostream &OS, ArrayRef<uint8_t> Data,
+ uint64_t Offset) const {
+ DataExtractor DE(Data, AsmInfo->isLittleEndian(), AsmInfo->getCodePointerSize());
+ uint64_t DataOffset = 0;
+ while (DataOffset + 4 <= Data.size()) {
+ OS << format(" %08" PRIx64 ": \t.word\t0x", Offset + DataOffset);
+ const auto Word = DE.getUnsigned(&DataOffset, 4);
+ OS << Twine::utohexstr(Word) << '\n';
+ }
+ if (DataOffset + 2 <= Data.size()) {
+ OS << format(" %08" PRIx64 ": \t.short\t0x", Offset + DataOffset);
+ const auto Short = DE.getUnsigned(&DataOffset, 2);
+ OS << Twine::utohexstr(Short) << '\n';
+ }
+ if (DataOffset + 1 == Data.size()) {
+ OS << format(" %08" PRIx64 ": \t.byte\t0x%x\n", Offset + DataOffset,
+ Data[DataOffset]);
+ }
+}
+
void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
uint64_t Offset,
const BinaryFunction *Function,
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 88562a60dd50da..400394f2c83a3c 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -491,11 +491,27 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
// Offset of the instruction in function.
uint64_t Offset = 0;
+ auto printConstantIslandInRange = [&](uint64_t Offset, uint64_t Size) {
+ std::optional<uint64_t> IslandOffset =
+ getIslandInRange(Offset, Offset + Size);
+
+ if (!IslandOffset)
+ return;
+
+ const size_t IslandSize = getSizeOfDataInCodeAt(*IslandOffset);
+ BC.printData(OS, BC.extractData(getAddress() + *IslandOffset, IslandSize),
+ *IslandOffset);
+ };
+
if (BasicBlocks.empty() && !Instructions.empty()) {
// Print before CFG was built.
+ uint64_t PrevOffset = 0;
for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
Offset = II.first;
+ // Print any constant islands inbeetween the instructions.
+ printConstantIslandInRange(PrevOffset, Offset);
+
// Print label if exists at this offset.
auto LI = Labels.find(Offset);
if (LI != Labels.end()) {
@@ -506,7 +522,12 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
}
BC.printInstruction(OS, II.second, Offset, this);
+
+ PrevOffset = Offset;
}
+
+ // Print any data at the end of the function.
+ printConstantIslandInRange(PrevOffset, getMaxSize());
}
StringRef SplitPointMsg = "";
@@ -1048,6 +1069,18 @@ size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
return getSize() - Offset;
}
+std::optional<uint64_t>
+BinaryFunction::getIslandInRange(uint64_t StartOffset, uint64_t EndOffset) const {
+ if (!Islands)
+ return std::nullopt;
+
+ auto Iter = llvm::lower_bound(Islands->DataOffsets, StartOffset);
+ if (Iter != Islands->DataOffsets.end() && *Iter < EndOffset)
+ return *Iter;
+
+ return std::nullopt;
+}
+
bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
ArrayRef<uint8_t> FunctionData = *getData();
uint64_t EndOfCode = getSize();
diff --git a/bolt/test/AArch64/data-in-code.s b/bolt/test/AArch64/data-in-code.s
new file mode 100644
index 00000000000000..8d3179a0c33508
--- /dev/null
+++ b/bolt/test/AArch64/data-in-code.s
@@ -0,0 +1,31 @@
+## Check that llvm-bolt prints data embedded in code.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags -fno-PIC -no-pie %t.o -o %t.exe -nostdlib \
+# RUN: -fuse-ld=lld -Wl,-q
+
+## Check disassembly of BOLT input.
+# RUN: llvm-objdump %t.exe -d | FileCheck %s
+
+# RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm | FileCheck %s
+
+.text
+.balign 4
+
+.global _start
+.type _start, %function
+_start:
+ mov x0, #0x0
+ .word 0x4f82e010
+ ret
+ .byte 0x0, 0xff, 0x42
+# CHECK-LABEL: _start
+# CHECK: mov x0, #0x0
+# CHECK-NEXT: .word 0x4f82e010
+# CHECK-NEXT: ret
+# CHECK-NEXT: .short 0xff00
+# CHECK-NEXT: .byte 0x42
+.size _start, .-_start
+
+## Force relocation mode.
+ .reloc 0, R_AARCH64_NONE
``````````
</details>
https://github.com/llvm/llvm-project/pull/125961
More information about the llvm-commits
mailing list