[llvm] 480a16d - [MC] Changes to help improve target specific symbol disassembly

Scott Linder via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 12 12:53:40 PDT 2020


Author: Ronak Chauhan
Date: 2020-06-12T15:51:37-04:00
New Revision: 480a16d5c8092f4eac181fb669c7ac05dc446a00

URL: https://github.com/llvm/llvm-project/commit/480a16d5c8092f4eac181fb669c7ac05dc446a00
DIFF: https://github.com/llvm/llvm-project/commit/480a16d5c8092f4eac181fb669c7ac05dc446a00.diff

LOG: [MC] Changes to help improve target specific symbol disassembly

Summary:
This commit slightly modifies the MCDisassembler, and llvm-objdump to
allow targets to also decode entire symbols.

WebAssembly uses the onSymbolStart hook it to decode preludes.
WebAssembly partially disassembles the symbol in its target specific
way; and then falls back to the normal flow of llvm-objdump.

AMDGPU needs it to decode kernel descriptors entirely, and move to the
next symbol.

This commit is to split the above task into 2.
- Changes to llvm-objdump and MC-layer without breaking WebAssembly code
  [ this commit ]
- AMDGPU's implementation of onSymbolStart that decodes kernel
  descriptors. [ https://reviews.llvm.org/D80713 ]

Reviewers: scott.linder, t-tye, sunfish, arsenm, jhenderson, MaskRay, aardappel

Reviewed By: scott.linder, jhenderson, aardappel

Subscribers: bcain, dschuff, wdng, tpr, sbc100, jgravelle-google, hiraditya, aheejin, MaskRay, rupprecht, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80512

Added: 
    

Modified: 
    llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
    llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
    llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
    llvm/tools/llvm-objdump/llvm-objdump.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
index cddf325994f2..deff52989832 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
@@ -127,8 +127,13 @@ class MCDisassembler {
                                       ArrayRef<uint8_t> Bytes, uint64_t Address,
                                       raw_ostream &CStream) const = 0;
 
-  /// May parse any prelude that precedes instructions after the start of a
-  /// symbol. Needed for some targets, e.g. WebAssembly.
+  /// Used to perform separate target specific disassembly for a particular
+  /// symbol. May parse any prelude that precedes instructions after the
+  /// start of a symbol, or the entire symbol.
+  /// This is used for example by WebAssembly to decode preludes.
+  ///
+  /// Base implementation returns None. So all targets by default ignore to
+  /// treat symbols separately.
   ///
   /// \param Name     - The name of the symbol.
   /// \param Size     - The number of bytes consumed.
@@ -136,11 +141,27 @@ class MCDisassembler {
   ///                   byte of the symbol.
   /// \param Bytes    - A reference to the actual bytes at the symbol location.
   /// \param CStream  - The stream to print comments and annotations on.
-  /// \return         - MCDisassembler::Success if the bytes are valid,
-  ///                   MCDisassembler::Fail if the bytes were invalid.
-  virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
-                                     ArrayRef<uint8_t> Bytes, uint64_t Address,
-                                     raw_ostream &CStream) const;
+  /// \return         - MCDisassembler::Success if bytes are decoded
+  ///                   successfully. Size must hold the number of bytes that
+  ///                   were decoded.
+  ///                 - MCDisassembler::Fail if the bytes are invalid. Size
+  ///                   must hold the number of bytes that were decoded before
+  ///                   failing. The target must print nothing. This can be
+  ///                   done by buffering the output if needed.
+  ///                 - None if the target doesn't want to handle the symbol
+  ///                   separately. Value of Size is ignored in this case.
+  virtual Optional<DecodeStatus> onSymbolStart(StringRef Name, uint64_t &Size,
+                                               ArrayRef<uint8_t> Bytes,
+                                               uint64_t Address,
+                                               raw_ostream &CStream) const;
+  // TODO:
+  // Implement similar hooks that can be used at other points during
+  // disassembly. Something along the following lines:
+  // - onBeforeInstructionDecode()
+  // - onAfterInstructionDecode()
+  // - onSymbolEnd()
+  // It should help move much of the target specific code from llvm-objdump to
+  // respective target disassemblers.
 
 private:
   MCContext &Ctx;

diff  --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
index 40ffd1fc5b73..cf3db23e004a 100644
--- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
+++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
@@ -16,12 +16,11 @@ using namespace llvm;
 
 MCDisassembler::~MCDisassembler() = default;
 
-MCDisassembler::DecodeStatus
+Optional<MCDisassembler::DecodeStatus>
 MCDisassembler::onSymbolStart(StringRef Name, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
                               raw_ostream &CStream) const {
-  Size = 0;
-  return MCDisassembler::Success;
+  return None;
 }
 
 bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,

diff  --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index a8cb5d18537c..8f14de6af6fe 100644
--- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -46,9 +46,10 @@ class WebAssemblyDisassembler final : public MCDisassembler {
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
                               raw_ostream &CStream) const override;
-  DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
-                             ArrayRef<uint8_t> Bytes, uint64_t Address,
-                             raw_ostream &CStream) const override;
+  Optional<DecodeStatus> onSymbolStart(StringRef Name, uint64_t &Size,
+                                       ArrayRef<uint8_t> Bytes,
+                                       uint64_t Address,
+                                       raw_ostream &CStream) const override;
 
 public:
   WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
@@ -120,7 +121,7 @@ bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
   return true;
 }
 
-MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
+Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart(
     StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
     raw_ostream &CStream) const {
   Size = 0;
@@ -128,21 +129,21 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
     // Start of a code section: we're parsing only the function count.
     int64_t FunctionCount;
     if (!nextLEB(FunctionCount, Bytes, Size, false))
-      return MCDisassembler::Fail;
+      return None;
     outs() << "        # " << FunctionCount << " functions in section.";
   } else {
     // Parse the start of a single function.
     int64_t BodySize, LocalEntryCount;
     if (!nextLEB(BodySize, Bytes, Size, false) ||
         !nextLEB(LocalEntryCount, Bytes, Size, false))
-      return MCDisassembler::Fail;
+      return None;
     if (LocalEntryCount) {
       outs() << "        .local ";
       for (int64_t I = 0; I < LocalEntryCount; I++) {
         int64_t Count, Type;
         if (!nextLEB(Count, Bytes, Size, false) ||
             !nextLEB(Type, Bytes, Size, false))
-          return MCDisassembler::Fail;
+          return None;
         for (int64_t J = 0; J < Count; J++) {
           if (I || J)
             outs() << ", ";

diff  --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index d130e5ca7ebc..c27e487ea6be 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1429,10 +1429,37 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
         continue;
       }
 
-      // Some targets (like WebAssembly) have a special prelude at the start
-      // of each symbol.
-      DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start),
-                            SectionAddr + Start, CommentStream);
+      auto Status = DisAsm->onSymbolStart(SymbolName, Size,
+                                          Bytes.slice(Start, End - Start),
+                                          SectionAddr + Start, CommentStream);
+      // To have round trippable disassembly, we fall back to decoding the
+      // remaining bytes as instructions.
+      //
+      // If there is a failure, we disassemble the failed region as bytes before
+      // falling back. The target is expected to print nothing in this case.
+      //
+      // If there is Success or SoftFail i.e no 'real' failure, we go ahead by
+      // Size bytes before falling back.
+      // So if the entire symbol is 'eaten' by the target:
+      //   Start += Size  // Now Start = End and we will never decode as
+      //                  // instructions
+      //
+      // Right now, most targets return None i.e ignore to treat a symbol
+      // separately. But WebAssembly decodes preludes for some symbols.
+      //
+      if (Status.hasValue()) {
+        if (Status.getValue() == MCDisassembler::Fail) {
+          outs() << "// Error in decoding " << SymbolName
+                 << " : Decoding failed region as bytes.\n";
+          for (uint64_t I = 0; I < Size; ++I) {
+            outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
+                   << "\n";
+          }
+        }
+      } else {
+        Size = 0;
+      }
+
       Start += Size;
 
       Index = Start;


        


More information about the llvm-commits mailing list