[llvm] 480a16d - [MC] Changes to help improve target specific symbol disassembly
Scott Linder via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 12 12:53:40 PDT 2020
Author: Ronak Chauhan
Date: 2020-06-12T15:51:37-04:00
New Revision: 480a16d5c8092f4eac181fb669c7ac05dc446a00
URL: https://github.com/llvm/llvm-project/commit/480a16d5c8092f4eac181fb669c7ac05dc446a00
DIFF: https://github.com/llvm/llvm-project/commit/480a16d5c8092f4eac181fb669c7ac05dc446a00.diff
LOG: [MC] Changes to help improve target specific symbol disassembly
Summary:
This commit slightly modifies the MCDisassembler, and llvm-objdump to
allow targets to also decode entire symbols.
WebAssembly uses the onSymbolStart hook it to decode preludes.
WebAssembly partially disassembles the symbol in its target specific
way; and then falls back to the normal flow of llvm-objdump.
AMDGPU needs it to decode kernel descriptors entirely, and move to the
next symbol.
This commit is to split the above task into 2.
- Changes to llvm-objdump and MC-layer without breaking WebAssembly code
[ this commit ]
- AMDGPU's implementation of onSymbolStart that decodes kernel
descriptors. [ https://reviews.llvm.org/D80713 ]
Reviewers: scott.linder, t-tye, sunfish, arsenm, jhenderson, MaskRay, aardappel
Reviewed By: scott.linder, jhenderson, aardappel
Subscribers: bcain, dschuff, wdng, tpr, sbc100, jgravelle-google, hiraditya, aheejin, MaskRay, rupprecht, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80512
Added:
Modified:
llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
llvm/tools/llvm-objdump/llvm-objdump.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
index cddf325994f2..deff52989832 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
@@ -127,8 +127,13 @@ class MCDisassembler {
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const = 0;
- /// May parse any prelude that precedes instructions after the start of a
- /// symbol. Needed for some targets, e.g. WebAssembly.
+ /// Used to perform separate target specific disassembly for a particular
+ /// symbol. May parse any prelude that precedes instructions after the
+ /// start of a symbol, or the entire symbol.
+ /// This is used for example by WebAssembly to decode preludes.
+ ///
+ /// Base implementation returns None. So all targets by default ignore to
+ /// treat symbols separately.
///
/// \param Name - The name of the symbol.
/// \param Size - The number of bytes consumed.
@@ -136,11 +141,27 @@ class MCDisassembler {
/// byte of the symbol.
/// \param Bytes - A reference to the actual bytes at the symbol location.
/// \param CStream - The stream to print comments and annotations on.
- /// \return - MCDisassembler::Success if the bytes are valid,
- /// MCDisassembler::Fail if the bytes were invalid.
- virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
- ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &CStream) const;
+ /// \return - MCDisassembler::Success if bytes are decoded
+ /// successfully. Size must hold the number of bytes that
+ /// were decoded.
+ /// - MCDisassembler::Fail if the bytes are invalid. Size
+ /// must hold the number of bytes that were decoded before
+ /// failing. The target must print nothing. This can be
+ /// done by buffering the output if needed.
+ /// - None if the target doesn't want to handle the symbol
+ /// separately. Value of Size is ignored in this case.
+ virtual Optional<DecodeStatus> onSymbolStart(StringRef Name, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CStream) const;
+ // TODO:
+ // Implement similar hooks that can be used at other points during
+ // disassembly. Something along the following lines:
+ // - onBeforeInstructionDecode()
+ // - onAfterInstructionDecode()
+ // - onSymbolEnd()
+ // It should help move much of the target specific code from llvm-objdump to
+ // respective target disassemblers.
private:
MCContext &Ctx;
diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
index 40ffd1fc5b73..cf3db23e004a 100644
--- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
+++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
@@ -16,12 +16,11 @@ using namespace llvm;
MCDisassembler::~MCDisassembler() = default;
-MCDisassembler::DecodeStatus
+Optional<MCDisassembler::DecodeStatus>
MCDisassembler::onSymbolStart(StringRef Name, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const {
- Size = 0;
- return MCDisassembler::Success;
+ return None;
}
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index a8cb5d18537c..8f14de6af6fe 100644
--- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -46,9 +46,10 @@ class WebAssemblyDisassembler final : public MCDisassembler {
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const override;
- DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
- ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &CStream) const override;
+ Optional<DecodeStatus> onSymbolStart(StringRef Name, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CStream) const override;
public:
WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
@@ -120,7 +121,7 @@ bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
return true;
}
-MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
+Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart(
StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const {
Size = 0;
@@ -128,21 +129,21 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
// Start of a code section: we're parsing only the function count.
int64_t FunctionCount;
if (!nextLEB(FunctionCount, Bytes, Size, false))
- return MCDisassembler::Fail;
+ return None;
outs() << " # " << FunctionCount << " functions in section.";
} else {
// Parse the start of a single function.
int64_t BodySize, LocalEntryCount;
if (!nextLEB(BodySize, Bytes, Size, false) ||
!nextLEB(LocalEntryCount, Bytes, Size, false))
- return MCDisassembler::Fail;
+ return None;
if (LocalEntryCount) {
outs() << " .local ";
for (int64_t I = 0; I < LocalEntryCount; I++) {
int64_t Count, Type;
if (!nextLEB(Count, Bytes, Size, false) ||
!nextLEB(Type, Bytes, Size, false))
- return MCDisassembler::Fail;
+ return None;
for (int64_t J = 0; J < Count; J++) {
if (I || J)
outs() << ", ";
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index d130e5ca7ebc..c27e487ea6be 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1429,10 +1429,37 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
continue;
}
- // Some targets (like WebAssembly) have a special prelude at the start
- // of each symbol.
- DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start),
- SectionAddr + Start, CommentStream);
+ auto Status = DisAsm->onSymbolStart(SymbolName, Size,
+ Bytes.slice(Start, End - Start),
+ SectionAddr + Start, CommentStream);
+ // To have round trippable disassembly, we fall back to decoding the
+ // remaining bytes as instructions.
+ //
+ // If there is a failure, we disassemble the failed region as bytes before
+ // falling back. The target is expected to print nothing in this case.
+ //
+ // If there is Success or SoftFail i.e no 'real' failure, we go ahead by
+ // Size bytes before falling back.
+ // So if the entire symbol is 'eaten' by the target:
+ // Start += Size // Now Start = End and we will never decode as
+ // // instructions
+ //
+ // Right now, most targets return None i.e ignore to treat a symbol
+ // separately. But WebAssembly decodes preludes for some symbols.
+ //
+ if (Status.hasValue()) {
+ if (Status.getValue() == MCDisassembler::Fail) {
+ outs() << "// Error in decoding " << SymbolName
+ << " : Decoding failed region as bytes.\n";
+ for (uint64_t I = 0; I < Size; ++I) {
+ outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
+ << "\n";
+ }
+ }
+ } else {
+ Size = 0;
+ }
+
Start += Size;
Index = Start;
More information about the llvm-commits
mailing list