[llvm] [XCOFF] Use RLDs to print branches even without -r (PR #74342)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 4 09:02:05 PST 2023
https://github.com/stephenpeckham created https://github.com/llvm/llvm-project/pull/74342
This presents misleading and confusing output. If you have a function defined at the beginning of an XCOFF object file, and you have a function call to an external function, the function call disassembles as a branch to the local function. That is,
`void f() { f(); g();}`
disassembles as
>00000000 <.f>:
0: 7c 08 02 a6 mflr 0
4: 94 21 ff c0 stwu 1, -64(1)
8: 90 01 00 48 stw 0, 72(1)
c: 4b ff ff f5 bl 0x0 <.f>
10: 4b ff ff f1 bl 0x0 <.f>
With this PR, the second call will display:
`10: 4b ff ff f1 bl 0x0 <.g> `
Using -r can help, but you still get the confusing output:
>10: 4b ff ff f1 bl 0x0 <.f>
00000010: R_RBR .g
>From 4d255c7164b92e6dbfa34226d31055f9438855f0 Mon Sep 17 00:00:00 2001
From: Stephen Peckham <speckham at us.ibm.com>
Date: Mon, 4 Dec 2023 11:41:32 -0500
Subject: [PATCH] [XCOFF] Use RLDs to print branches even without -r
This presents misleading and confusing output.
---
.../XCOFF/disassemble-symbolize-operands.ll | 2 +-
.../XCOFF/disassemble-symbolize-operands2.ll | 62 +++++++++
llvm/tools/llvm-objdump/llvm-objdump.cpp | 119 +++++++++++-------
3 files changed, 139 insertions(+), 44 deletions(-)
create mode 100644 llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands2.ll
diff --git a/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll b/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll
index adedb6b7a5abf..2b4d6806292ce 100644
--- a/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll
+++ b/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll
@@ -21,7 +21,7 @@
; CHECK-NEXT: 68: cmplwi 3, 11
; CHECK-NEXT: 6c: bt 0, 0x60 <L2>
; CHECK-NEXT: 70: mr 31, 3
-; CHECK-NEXT: 74: bl 0x0 <.internal>
+; CHECK-NEXT: 74: bl 0x0 <.extern>
; CHECK-NEXT: 78: nop
; CHECK-NEXT: 7c: mr 3, 31
; CHECK-NEXT: 80: b 0x60 <L2>
diff --git a/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands2.ll b/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands2.ll
new file mode 100644
index 0000000000000..48ce2e7cc5514
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands2.ll
@@ -0,0 +1,62 @@
+; RUN: llc -mtriple=powerpc-ibm-aix-xcoff %s -filetype=obj -o %t
+; RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn \
+; RUN: | FileCheck %s
+
+; CHECK-LABEL: <.a>:
+;; No <L0> should appear
+; CHECK-NEXT: 0: mflr 0
+; CHECK-NEXT: 4: stwu 1, -64(1)
+; CHECK-NEXT: 8: lwz 3, 0(2)
+; CHECK-NEXT: c: stw 0, 72(1)
+; CHECK-NEXT: 10: lwz 3, 0(3)
+; CHECK-NEXT: 14: bl 0x4c <.b>
+; CHECK-NEXT: 18: nop
+; CHECK-NEXT: 1c: li 3, 1
+; CHECK-NEXT: 20: bl 0x0 <.c>
+
+; CHECK-LABEL: <.b>:
+; CHECK-NEXT: 4c: mflr 0
+; CHECK-NEXT: 50: stwu 1, -64(1)
+; CHECK-NEXT: 54: cmplwi 3, 1
+; CHECK-NEXT: 58: stw 0, 72(1)
+; CHECK-NEXT: 5c: stw 3, 60(1)
+; CHECK-NEXT: 60: bf 2, 0x6c <L0>
+; CHECK-NEXT: 64: bl 0x0 <.a>
+; CHECK-NEXT: 68: nop
+; CHECK-NEXT:<L0>:
+; CHECK-NEXT: 6c: li 3, 2
+; CHECK-NEXT: 70: bl 0x0 <.c>
+
+target triple = "powerpc-ibm-aix7.2.0.0"
+
+ at var = external global i32, align 4
+
+; Function Attrs: noinline nounwind optnone
+define i32 @a() {
+entry:
+ %0 = load i32, ptr @var, align 4
+ %call = call i32 @b(i32 noundef %0)
+ %call1 = call i32 @c(i32 noundef 1)
+ ret i32 %call1
+}
+
+; Function Attrs: noinline nounwind optnone
+define i32 @b(i32 noundef %x) {
+entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, ptr %x.addr, align 4
+ %0 = load i32, ptr %x.addr, align 4
+ %cmp = icmp eq i32 %0, 1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %call = call i32 @a()
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %call1 = call i32 @c(i32 noundef 2)
+ ret i32 %call1
+}
+
+declare i32 @c(i32 noundef)
+
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 2b7dee7bf46b9..8645625a367f7 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1292,6 +1292,8 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
// So far only supports PowerPC and X86.
if (!STI->getTargetTriple().isPPC() && !STI->getTargetTriple().isX86())
return;
+ const bool isXCOFF = STI->getTargetTriple().isOSBinFormatXCOFF();
+ const bool isPPC = STI->getTargetTriple().isPPC();
if (MIA)
MIA->resetState();
@@ -1312,18 +1314,22 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
Size = std::min<uint64_t>(ThisBytes.size(),
DisAsm->suggestBytesToSkip(ThisBytes, Index));
- if (Disassembled && MIA) {
- uint64_t Target;
- bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
- // On PowerPC, if the address of a branch is the same as the target, it
- // means that it's a function call. Do not mark the label for this case.
- if (TargetKnown && (Target >= Start && Target < End) &&
- !Labels.count(Target) &&
- !(STI->getTargetTriple().isPPC() && Target == Index))
- Labels[Target] = ("L" + Twine(LabelCount++)).str();
- MIA->updateState(Inst, Index);
- } else if (!Disassembled && MIA) {
- MIA->resetState();
+ if (MIA) {
+ if (Disassembled) {
+ uint64_t Target;
+ bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
+ if (TargetKnown && (Target >= Start && Target < End) &&
+ !Labels.count(Target)) {
+ // On PowerPC and AIX, a function call is encoded as a branch to 0.
+ // On other PowerPC platforms (ELF), a function call is encoded as
+ // a branch to self. Do not add a label for these cases.
+ if (!(isPPC && ((Target == 0 && isXCOFF) ||
+ (Target == Index && !isXCOFF))))
+ Labels[Target] = ("L" + Twine(LabelCount++)).str();
+ }
+ MIA->updateState(Inst, Index);
+ } else
+ MIA->resetState();
}
Index += Size;
}
@@ -1487,7 +1493,7 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
}
std::map<SectionRef, std::vector<RelocationRef>> RelocMap;
- if (InlineRelocs)
+ if (InlineRelocs || Obj.isXCOFF())
RelocMap = getRelocsMap(Obj);
bool Is64Bits = Obj.getBytesInAddress() > 4;
@@ -1979,6 +1985,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
DT->InstrAnalysis->resetState();
while (Index < End) {
+ uint64_t RelOffset;
+
// ARM and AArch64 ELF binaries can interleave data and text in the
// same section. We rely on the markers introduced to understand what
// we need to dump. If the data marker is within a function, it is
@@ -2013,6 +2021,29 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
}
}
+ auto findRel = [&]() {
+ // Hexagon handles relocs in pretty printer
+ if (Obj.getArch() == Triple::hexagon)
+ return false;
+ while (RelCur != RelEnd) {
+ RelOffset = RelCur->getOffset() - RelAdjustment;
+ // If this relocation is hidden, skip it.
+ if (getHidden(*RelCur) || SectionAddr + RelOffset < StartAddress) {
+ ++RelCur;
+ continue;
+ }
+
+ // Stop when RelCur's offset is past the disassembled
+ // instruction/data.
+ if (RelOffset >= Index + Size)
+ return false;
+ if (RelOffset >= Index)
+ return true;
+ ++RelCur;
+ }
+ return false;
+ };
+
if (DumpARMELFData) {
Size = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
MappingSymbols, *DT->SubtargetInfo, FOS);
@@ -2081,17 +2112,19 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
DT->InstPrinter->setCommentStream(llvm::nulls());
- // If disassembly has failed, avoid analysing invalid/incomplete
- // instruction information. Otherwise, try to resolve the target
- // address (jump target or memory operand address) and print it on the
+ // If disassembly succeeds, we try to resolve the target address
+ // (jump target or memory operand address) and print it to the
// right of the instruction.
+ //
+ // Otherwise, we don't print anything else so that we avoid
+ // analyzing invalid or incomplete instruction information.
if (Disassembled && DT->InstrAnalysis) {
- // Branch targets are printed just after the instructions.
llvm::raw_ostream *TargetOS = &FOS;
uint64_t Target;
bool PrintTarget = DT->InstrAnalysis->evaluateBranch(
Inst, SectionAddr + Index, Size, Target);
- if (!PrintTarget)
+
+ if (!PrintTarget) {
if (std::optional<uint64_t> MaybeTarget =
DT->InstrAnalysis->evaluateMemoryOperandAddress(
Inst, DT->SubtargetInfo.get(), SectionAddr + Index,
@@ -2105,6 +2138,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
*TargetOS << "0x" << Twine::utohexstr(Target);
}
}
+ }
+
if (PrintTarget) {
// In a relocatable object, the target's section must reside in
// the same section as the call instruction or it is accessed
@@ -2114,7 +2149,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
// In that case, locate the section(s) containing the target
// address and find the symbol in one of those, if possible.
//
- // N.B. We don't walk the relocations in the relocatable case yet.
+ // N.B. Except for XCOFF, we don't walk the relocations in the
+ // relocatable case yet.
std::vector<const SectionSymbolsTy *> TargetSectionSymbols;
if (!Obj.isRelocatableObject()) {
auto It = llvm::partition_point(
@@ -2160,9 +2196,11 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
break;
}
+ // Branch targets are printed just after the instructions.
// Print the labels corresponding to the target if there's any.
bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target);
bool LabelAvailable = AllLabels.count(Target);
+
if (TargetSym != nullptr) {
uint64_t TargetAddress = TargetSym->Addr;
uint64_t Disp = Target - TargetAddress;
@@ -2170,9 +2208,21 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
: TargetSym->Name.str();
*TargetOS << " <";
- if (!Disp) {
- // Always Print the binary symbol precisely corresponding to
- // the target address.
+ // On XCOFF, we use relocations, even without -r, so we
+ // can print the correct name for an extern function call.
+ if (Obj.isXCOFF() && findRel()) {
+ SmallString<32> Val;
+
+ // If we have a valid relocation, try to print the
+ // corresponding symbol name. Multiple relocations on the
+ // same instruction are not handled.
+ if (!getRelocationValueString(*RelCur, Val))
+ *TargetOS << Val;
+ else
+ *TargetOS << TargetName;
+ if (Disp)
+ *TargetOS << "+0x" << Twine::utohexstr(Disp);
+ } else if (!Disp) {
*TargetOS << TargetName;
} else if (BBAddrMapLabelAvailable) {
*TargetOS << BBAddrMapLabels[Target].front();
@@ -2209,36 +2259,19 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
if (BTF)
printBTFRelocation(FOS, *BTF, {Index, Section.getIndex()}, LVP);
- // Hexagon does this in pretty printer
- if (Obj.getArch() != Triple::hexagon) {
- // Print relocation for instruction and data.
- while (RelCur != RelEnd) {
- uint64_t Offset = RelCur->getOffset() - RelAdjustment;
- // If this relocation is hidden, skip it.
- if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
- ++RelCur;
- continue;
- }
-
- // Stop when RelCur's offset is past the disassembled
- // instruction/data. Note that it's possible the disassembled data
- // is not the complete data: we might see the relocation printed in
- // the middle of the data, but this matches the binutils objdump
- // output.
- if (Offset >= Index + Size)
- break;
-
+ if (InlineRelocs) {
+ while (findRel()) {
// When --adjust-vma is used, update the address printed.
if (RelCur->getSymbol() != Obj.symbol_end()) {
Expected<section_iterator> SymSI =
RelCur->getSymbol()->getSection();
if (SymSI && *SymSI != Obj.section_end() &&
shouldAdjustVA(**SymSI))
- Offset += AdjustVMA;
+ RelOffset += AdjustVMA;
}
printRelocation(FOS, Obj.getFileName(), *RelCur,
- SectionAddr + Offset, Is64Bits);
+ SectionAddr + RelOffset, Is64Bits);
LVP.printAfterOtherLine(FOS, true);
++RelCur;
}
More information about the llvm-commits
mailing list