[llvm] 8710eff - [MC][AMDGPU][llvm-objdump] Synthesized local labels in disassembly
Tim Renouf via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 26 05:57:02 PDT 2021
Author: Tim Renouf
Date: 2021-04-26T13:56:36+01:00
New Revision: 8710eff6c3bab333bd270573fc349938b9a69dc0
URL: https://github.com/llvm/llvm-project/commit/8710eff6c3bab333bd270573fc349938b9a69dc0
DIFF: https://github.com/llvm/llvm-project/commit/8710eff6c3bab333bd270573fc349938b9a69dc0.diff
LOG: [MC][AMDGPU][llvm-objdump] Synthesized local labels in disassembly
1. Add an accessor function to MCSymbolizer to retrieve addresses
referenced by a symbolizable operand, but not resolved to a symbol.
That way, the caller can synthesize labels at those addresses and
then retry disassembling the section.
2. Implement that in AMDGPU -- a failed symbol lookup results in the
address being added to a vector returned by the new function.
3. Use that in llvm-objdump when using MCSymbolizer (which only happens
on AMDGPU) and SymbolizeOperands is on.
Differential Revision: https://reviews.llvm.org/D101145
Change-Id: I19087c3bbfece64bad5a56ee88bcc9110d83989e
Added:
llvm/test/tools/llvm-objdump/AMDGPU/elf-disassemble-symbolize-operands.yaml
Modified:
llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
llvm/tools/llvm-objdump/llvm-objdump.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
index b7ca83a5f16c..b966106007db 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
@@ -15,6 +15,7 @@
#ifndef LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
#define LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include <algorithm>
#include <cstdint>
@@ -75,6 +76,17 @@ class MCSymbolizer {
virtual void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
int64_t Value,
uint64_t Address) = 0;
+
+ /// Get the MCSymbolizer's list of addresses that were referenced by
+ /// symbolizable operands but not resolved to a symbol. The caller (some
+ /// code that is disassembling a section or other chunk of code) would
+ /// typically create a synthetic label at each address and add them to its
+ /// list of symbols in the section, before creating a new MCSymbolizer with
+ /// the enhanced symbol list and retrying disassembling the section.
+ /// The returned array is unordered and may have duplicates.
+ /// The returned ArrayRef stops being valid on any call to or destruction of
+ /// the MCSymbolizer object.
+ virtual ArrayRef<uint64_t> getReferencedAddresses() const { return {}; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 499c72409f54..7507f681678d 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1834,6 +1834,8 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
Inst.addOperand(MCOperand::createExpr(Add));
return true;
}
+ // Add to list of referenced addresses, so caller can synthesize a label.
+ ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 7c6d3afbe788..93e2f636bdab 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -183,6 +183,7 @@ class AMDGPUDisassembler : public MCDisassembler {
class AMDGPUSymbolizer : public MCSymbolizer {
private:
void *DisInfo;
+ std::vector<uint64_t> ReferencedAddresses;
public:
AMDGPUSymbolizer(MCContext &Ctx, std::unique_ptr<MCRelocationInfo> &&RelInfo,
@@ -197,6 +198,10 @@ class AMDGPUSymbolizer : public MCSymbolizer {
void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
int64_t Value,
uint64_t Address) override;
+
+ ArrayRef<uint64_t> getReferencedAddresses() const override {
+ return ReferencedAddresses;
+ }
};
} // end namespace llvm
diff --git a/llvm/test/tools/llvm-objdump/AMDGPU/elf-disassemble-symbolize-operands.yaml b/llvm/test/tools/llvm-objdump/AMDGPU/elf-disassemble-symbolize-operands.yaml
new file mode 100644
index 000000000000..563864633902
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/AMDGPU/elf-disassemble-symbolize-operands.yaml
@@ -0,0 +1,89 @@
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
+# RUN: FileCheck %s
+
+## Expect to find the branch labels.
+# CHECK: <break_cond_is_arg>:
+# CHECK: s_branch L1
+# CHECK: <L0>:
+# CHECK: s_cbranch_execz L2
+# CHECK: <L1>:
+# CHECK: s_branch L0
+# CHECKL <L2>:
+
+# I created this YAML starting with this LLVM IR:
+#
+# define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
+# entry:
+# br label %loop
+# loop:
+# %tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ]
+# %tmp23 = add nuw i32 %tmp23phi, 1
+# %tmp27 = icmp ult i32 %arg, %tmp23
+# br i1 %tmp27, label %then, label %endif
+# then: ; preds = %bb
+# call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
+# br label %endif
+# endif: ; preds = %bb28, %bb
+# br i1 %breakcond, label %loop, label %loopexit
+# loopexit:
+# ret void
+# }
+#
+# declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
+#
+# attributes #0 = { nounwind writeonly }
+#
+# I compiled it to a relocatable ELF:
+#
+# llc -march=amdgcn -mcpu=gfx1030 llvm/a.ll -filetype=obj -o a.elf
+#
+# then converted it to YAML:
+#
+# obj2yaml a.elf
+#
+# then manually removed the BB0_1 etc local symbols.
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_REL
+ Machine: EM_AMDGPU
+ Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1030 ]
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x4
+ Content: 00008CBF0000FDBB81020236810385BE800384BE8102847D6AC10689040082BF7E077E88058105817E047E8A080088BF0500887D7E060787070404886A3C87BEF7FF88BF000070E000000104F4FF82BF7E047E880000FDBB1E2080BE
+ - Name: .AMDGPU.config
+ Type: SHT_PROGBITS
+ AddressAlign: 0x1
+ Content: 48B80000000000004CB800000000000060B800000000000004000000000000000800000000000000
+ - Name: .note.GNU-stack
+ Type: SHT_PROGBITS
+ AddressAlign: 0x1
+ - Name: .note
+ Type: SHT_NOTE
+ AddressAlign: 0x4
+ Notes:
+ - Name: AMD
+ Desc: 616D6467636E2D756E6B6E6F776E2D6C696E75782D676E752D67667831303330
+ Type: NT_FREEBSD_PROCSTAT_GROUPS
+ - Type: SectionHeaderTable
+ Sections:
+ - Name: .strtab
+ - Name: .shstrtab
+ - Name: .text
+ - Name: .AMDGPU.config
+ - Name: .note.GNU-stack
+ - Name: .note
+ - Name: .symtab
+Symbols:
+ - Name: break_cond_is_arg
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ Size: 0x5C
+...
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 57aac91baaf9..4b2ca6560677 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -972,6 +972,62 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
}
}
+// Create an MCSymbolizer for the target and add it to the MCDisassembler.
+// This is currently only used on AMDGPU, and assumes the format of the
+// void * argument passed to AMDGPU's createMCSymbolizer.
+static void addSymbolizer(MCContext &Ctx, const Target *Target,
+ StringRef TripleName, MCDisassembler *DisAsm,
+ uint64_t SectionAddr, ArrayRef<uint8_t> Bytes,
+ SectionSymbolsTy &Symbols,
+ std::vector<std::string *> &SynthesizedLabelNames) {
+
+ std::unique_ptr<MCRelocationInfo> RelInfo(
+ Target->createMCRelocationInfo(TripleName, Ctx));
+ if (!RelInfo)
+ return;
+ std::unique_ptr<MCSymbolizer> Symbolizer(Target->createMCSymbolizer(
+ TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
+ MCSymbolizer *SymbolizerPtr = &*Symbolizer;
+ DisAsm->setSymbolizer(std::move(Symbolizer));
+
+ if (!SymbolizeOperands)
+ return;
+
+ // Synthesize labels referenced by branch instructions by
+ // disassembling, discarding the output, and collecting the referenced
+ // addresses from the symbolizer.
+ for (size_t Index = 0; Index != Bytes.size();) {
+ MCInst Inst;
+ uint64_t Size;
+ DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), SectionAddr + Index,
+ nulls());
+ if (Size == 0)
+ Size = 1;
+ Index += Size;
+ }
+ ArrayRef<uint64_t> LabelAddrsRef = SymbolizerPtr->getReferencedAddresses();
+ // Copy and sort to remove duplicates.
+ std::vector<uint64_t> LabelAddrs;
+ LabelAddrs.insert(LabelAddrs.end(), LabelAddrsRef.begin(),
+ LabelAddrsRef.end());
+ llvm::sort(LabelAddrs);
+ LabelAddrs.resize(std::unique(LabelAddrs.begin(), LabelAddrs.end()) -
+ LabelAddrs.begin());
+ // Add the labels.
+ for (unsigned LabelNum = 0; LabelNum != LabelAddrs.size(); ++LabelNum) {
+ SynthesizedLabelNames.push_back(
+ new std::string((Twine("L") + Twine(LabelNum)).str()));
+ Symbols.push_back(SymbolInfoTy(
+ LabelAddrs[LabelNum], *SynthesizedLabelNames.back(), ELF::STT_NOTYPE));
+ }
+ llvm::stable_sort(Symbols);
+ // Recreate the symbolizer with the new symbols list.
+ RelInfo.reset(Target->createMCRelocationInfo(TripleName, Ctx));
+ Symbolizer.reset(Target->createMCSymbolizer(
+ TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
+ DisAsm->setSymbolizer(std::move(Symbolizer));
+}
+
static StringRef getSegmentName(const MachOObjectFile *MachO,
const SectionRef &Section) {
if (MachO) {
@@ -1134,16 +1190,14 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
llvm::sort(MappingSymbols);
+ ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
+ unwrapOrError(Section.getContents(), Obj->getFileName()));
+
+ std::vector<std::string *> SynthesizedLabelNames;
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
// AMDGPU disassembler uses symbolizer for printing labels
- std::unique_ptr<MCRelocationInfo> RelInfo(
- TheTarget->createMCRelocationInfo(TripleName, Ctx));
- if (RelInfo) {
- std::unique_ptr<MCSymbolizer> Symbolizer(
- TheTarget->createMCSymbolizer(
- TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
- DisAsm->setSymbolizer(std::move(Symbolizer));
- }
+ addSymbolizer(Ctx, TheTarget, TripleName, DisAsm, SectionAddr, Bytes,
+ Symbols, SynthesizedLabelNames);
}
StringRef SegmentName = getSegmentName(MachO, Section);
@@ -1159,9 +1213,6 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
SmallString<40> Comments;
raw_svector_ostream CommentStream(Comments);
- ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
- unwrapOrError(Section.getContents(), Obj->getFileName()));
-
uint64_t VMAAdjustment = 0;
if (shouldAdjustVA(Section))
VMAAdjustment = AdjustVMA;
More information about the llvm-commits
mailing list