[llvm-commits] [llvm] r128308 - in /llvm/trunk: include/llvm-c/Disassembler.h lib/MC/MCDisassembler/Disassembler.cpp lib/MC/MCDisassembler/Disassembler.h

Kevin Enderby enderby at apple.com
Fri Mar 25 17:06:33 PDT 2011


Author: enderby
Date: Fri Mar 25 19:06:33 2011
New Revision: 128308

URL: http://llvm.org/viewvc/llvm-project?rev=128308&view=rev
Log:
Adding a C API to the disassembler for use by such tools as Darwin's otool(1).
This is a work in progress as the interface for producing symbolic operands is
not done.  But a hacked prototype using information from the object file's
relocation entiries and replacing immediate operands with MCExpr's has been
shown to work with no changes to the instrucion printer.  These APIs will be
moved into a dynamic library at some point.

Added:
    llvm/trunk/include/llvm-c/Disassembler.h
    llvm/trunk/lib/MC/MCDisassembler/Disassembler.cpp
    llvm/trunk/lib/MC/MCDisassembler/Disassembler.h

Added: llvm/trunk/include/llvm-c/Disassembler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm-c/Disassembler.h?rev=128308&view=auto
==============================================================================
--- llvm/trunk/include/llvm-c/Disassembler.h (added)
+++ llvm/trunk/include/llvm-c/Disassembler.h Fri Mar 25 19:06:33 2011
@@ -0,0 +1,106 @@
+/*===-- llvm-c/Disassembler.h - Disassembler Public C Interface ---*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header provides public interface to a disassembler library.           *|
+|* LLVM provides an implementation of this interface.                         *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_DISASSEMBLER_H
+#define LLVM_C_DISASSEMBLER_H  1
+
+#include <stdint.h>
+#include <stddef.h>
+
+/**
+ * An opaque reference to a disassembler context.
+ */
+typedef void *LLVMDisasmContextRef;
+
+/**
+ * The type for the operand information call back function.  This is called to
+ * get the symbolic information for an operand of an instruction.  Typically
+ * this is from the relocation information, symbol table, etc.  That block of
+ * information is saved when the disassembler context is created and passed to
+ * the call back in the DisInfo parameter.  The instruction containing operand
+ * is at the PC parameter.  For some instruction sets, there can be more than
+ * one operand with symbolic information.  To determine the symbolic operand
+ * infomation for each operand, the bytes for the specific operand in the
+ * instruction are specified by the Offset parameter and its byte widith is the
+ * size parameter.  For instructions sets with fixed widths and one symbolic
+ * operand per instruction, the Offset parameter will be zero and Size parameter
+ * will be the instruction width.  The information is returned in TagBuf and is 
+ * Triple specific with its specific information defined by the value of
+ * TagType for that Triple.  If symbolic information is returned the function
+ * returns 1 else it returns 0.
+ */
+typedef int (*LLVMOpInfoCallback)(void *DisInfo,
+                                  uint64_t PC,
+                                  uint64_t Offset,
+                                  uint64_t Size,
+                                  int TagType,
+                                  void *TagBuf);
+
+/**
+ * The type for the symbol lookup function.  This may be called by the
+ * disassembler for such things like adding a comment for a PC plus a constant
+ * offset load instruction to use a symbol name instead of a load address value.
+ * It is passed the block information is saved when the disassembler context is
+ * created and a value of a symbol to look up.  If no symbol is found NULL is
+ * to be returned.
+ */
+typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo,
+                                                uint64_t SymbolValue);
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* !defined(__cplusplus) */
+
+/**
+ * Create a disassembler for the TripleName.  Symbolic disassembly is supported
+ * by passing a block of information in the DisInfo parameter and specifing the
+ * TagType and call back functions as described above.  These can all be passed
+ * as NULL.  If successfull this returns a disassembler context if not it
+ * returns NULL.
+ */
+extern LLVMDisasmContextRef
+LLVMCreateDisasm(const char *TripleName,
+                 void *DisInfo,
+                 int TagType,
+                 LLVMOpInfoCallback GetOpInfo,
+                 LLVMSymbolLookupCallback SymbolLookUp);
+
+/**
+ * Dispose of a disassembler context.
+ */
+extern void
+LLVMDisasmDispose(LLVMDisasmContextRef DC);
+
+/**
+ * Disassmble a single instruction using the disassembler context specified in
+ * the parameter DC.  The bytes of the instuction are specified in the parameter
+ * Bytes, and contains at least BytesSize number of bytes.  The instruction is
+ * at the address specified by the PC parameter.  If a valid instruction can be
+ * disassembled its string is returned indirectly in OutString which whos size
+ * is specified in the parameter OutStringSize.  This function returns the
+ * number of bytes in the instruction or zero if there was no valid instruction.
+ */
+extern size_t
+LLVMDisasmInstruction(LLVMDisasmContextRef DC,
+                      uint8_t *Bytes,
+                      uint64_t BytesSize,
+                      uint64_t PC,
+                      char *OutString,
+                      size_t OutStringSize);
+
+#ifdef __cplusplus
+}
+#endif /* !defined(__cplusplus) */
+
+#endif /* !defined(LLVM_C_DISASSEMBLER_H) */

Added: llvm/trunk/lib/MC/MCDisassembler/Disassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCDisassembler/Disassembler.cpp?rev=128308&view=auto
==============================================================================
--- llvm/trunk/lib/MC/MCDisassembler/Disassembler.cpp (added)
+++ llvm/trunk/lib/MC/MCDisassembler/Disassembler.cpp Fri Mar 25 19:06:33 2011
@@ -0,0 +1,169 @@
+//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface -*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "Disassembler.h"
+#include <stdio.h>
+#include "llvm-c/Disassembler.h"
+
+#include <string>
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmInfo.h"  // FIXME.
+#include "llvm/Target/TargetMachine.h"  // FIXME.
+#include "llvm/Target/targetSelect.h"
+#include "llvm/Support/MemoryObject.h"
+
+namespace llvm {
+class Target;
+} // namespace llvm
+using namespace llvm;
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//
+// LLVMCreateDisasm() creates a disassembler for the TripleName.  Symbolic
+// disassembly is supported by passing a block of information in the DisInfo
+// parameter and specifing the TagType and call back functions as described in
+// the header llvm-c/Disassembler.h .  The pointer to the block and the 
+// functions can all be passed as NULL.  If successfull this returns a
+// disassembler context if not it returns NULL.
+//
+LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
+                                      int TagType, LLVMOpInfoCallback GetOpInfo,
+                                      LLVMSymbolLookupCallback SymbolLookUp) {
+  // Initialize targets and assembly printers/parsers.
+  llvm::InitializeAllTargetInfos();
+  // FIXME: We shouldn't need to initialize the Target(Machine)s.
+  llvm::InitializeAllTargets();
+  llvm::InitializeAllAsmPrinters();
+  llvm::InitializeAllAsmParsers();
+  llvm::InitializeAllDisassemblers();
+
+  // Get the target.
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+  assert(TheTarget && "Unable to create target!");
+
+  // Get the assembler info needed to setup the MCContext.
+  const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName);
+  assert(MAI && "Unable to create target asm info!");
+
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+
+  // FIXME: We shouldn't need to do this (and link in codegen).
+  //        When we split this out, we should do it in a way that makes
+  //        it straightforward to switch subtargets on the fly.
+  TargetMachine *TM = TheTarget->createTargetMachine(TripleName, FeaturesStr);
+  assert(TM && "Unable to create target machine!");
+
+  // Get the target assembler info needed to setup the context.
+  const TargetAsmInfo *tai = new TargetAsmInfo(*TM);
+  assert(tai && "Unable to create target assembler!");
+
+  // Set up the MCContext for creating symbols and MCExpr's.
+  MCContext *Ctx = new MCContext(*MAI, tai);
+  assert(Ctx && "Unable to create MCContext!");
+
+  // Set up disassembler.
+  const MCDisassembler *DisAsm = TheTarget->createMCDisassembler();
+  assert(DisAsm && "Unable to create disassembler!");
+
+  // Set up the instruction printer.
+  int AsmPrinterVariant = MAI->getAssemblerDialect();
+  MCInstPrinter *IP = TheTarget->createMCInstPrinter(*TM, AsmPrinterVariant,
+                                                     *MAI);
+  assert(IP && "Unable to create instruction printer!");
+
+  LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType,
+                                                GetOpInfo, SymbolLookUp,
+                                                TheTarget, MAI, TM, tai, Ctx,
+                                                DisAsm, IP);
+  assert(DC && "Allocation failure!");
+  return DC;
+}
+
+//
+// LLVMDisasmDispose() disposes of the disassembler specified by the context.
+//
+void LLVMDisasmDispose(LLVMDisasmContextRef DCR){
+  LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+  delete DC;
+}
+
+namespace {
+//
+// The memory object created by LLVMDisasmInstruction().
+//
+class DisasmMemoryObject : public MemoryObject {
+private:
+  uint8_t *Bytes;
+  uint64_t Size;
+  uint64_t BasePC;
+public:
+  DisasmMemoryObject(uint8_t *bytes, uint64_t size, uint64_t basePC) :
+                     Bytes(bytes), Size(size), BasePC(basePC) {}
+ 
+  uint64_t getBase() const { return BasePC; }
+  uint64_t getExtent() const { return Size; }
+
+  int readByte(uint64_t Addr, uint8_t *Byte) const {
+    if (Addr - BasePC >= Size)
+      return -1;
+    *Byte = Bytes[Addr - BasePC];
+    return 0;
+  }
+};
+} // namespace
+
+//
+// LLVMDisasmInstruction() disassmbles a single instruction using the
+// disassembler context specified in the parameter DC.  The bytes of the
+// instuction are specified in the parameter Bytes, and contains at least
+// BytesSize number of bytes.  The instruction is at the address specified by
+// the PC parameter.  If a valid instruction can be disassembled its string is
+// returned indirectly in OutString which whos size is specified in the
+// parameter OutStringSize.  This function returns the number of bytes in the
+// instruction or zero if there was no valid instruction.  If this function
+// returns zero the caller will have to pick how many bytes they want to step
+// over by printing a .byte, .long etc. to continue.
+//
+size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
+                             uint64_t BytesSize, uint64_t PC, char *OutString,
+                             size_t OutStringSize){
+  LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+  // Wrap the pointer to the Bytes, BytesSize and PC in a MemoryObject.
+  DisasmMemoryObject MemoryObject(Bytes, BytesSize, PC);
+
+  uint64_t Size;
+  MCInst Inst;
+  const MCDisassembler *DisAsm = DC->getDisAsm();
+  MCInstPrinter *IP = DC->getIP();
+  if (!DisAsm->getInstruction(Inst, Size, MemoryObject, PC, /*REMOVE*/ nulls()))
+    return 0;
+
+  std::string InsnStr;
+  raw_string_ostream OS(InsnStr);
+  raw_ostream &Out = OS;
+  IP->printInst(&Inst, Out);
+
+  std::string p;
+  p = OS.str();
+  snprintf(OutString, OutStringSize, "%s", p.c_str());
+  return Size;
+}
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus

Added: llvm/trunk/lib/MC/MCDisassembler/Disassembler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCDisassembler/Disassembler.h?rev=128308&view=auto
==============================================================================
--- llvm/trunk/lib/MC/MCDisassembler/Disassembler.h (added)
+++ llvm/trunk/lib/MC/MCDisassembler/Disassembler.h Fri Mar 25 19:06:33 2011
@@ -0,0 +1,90 @@
+//===------------- Disassembler.h - LLVM Disassembler -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Disassembly library's disassembler 
+// context.  The disassembler is responsible for producing strings for
+// individual instructions according to a given architecture and disassembly
+// syntax.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm-c/Disassembler.h"
+#include <string>
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+class TargetAsmInfo;
+class MCContext;
+class MCAsmInfo;
+class MCDisassembler;
+class MCInstPrinter; 
+class Target;
+class TargetMachine;
+
+//
+// This is the disassembler context returned by LLVMCreateDisasm().
+//
+class LLVMDisasmContext {
+private:
+  //
+  // The passed parameters when the disassembler context is created.
+  //
+  // The TripleName for this disassembler.
+  std::string TripleName;
+  // The pointer to the caller's block of symbolic information.
+  void *DisInfo;
+  // The Triple specific symbolic information type returned by GetOpInfo.
+  int TagType;
+  // The function to get the symbolic information for operands.
+  LLVMOpInfoCallback GetOpInfo;
+  // The function to look up a symbol name.
+  LLVMSymbolLookupCallback SymbolLookUp;
+  //
+  // The objects created and saved by LLVMCreateDisasm() then used by
+  // LLVMDisasmInstruction().
+  //
+  // The LLVM target corresponding to the disassembler.
+  // FIXME: using llvm::OwningPtr<const llvm::Target> causes a malloc error
+  //        when this LLVMDisasmContext is deleted.
+  const Target *TheTarget;
+  // The assembly information for the target architecture.
+  llvm::OwningPtr<const llvm::MCAsmInfo> MAI;
+  // The target machine instance.
+  llvm::OwningPtr<llvm::TargetMachine> TM;
+  // The disassembler for the target architecture.
+  // FIXME: using llvm::OwningPtr<const llvm::TargetAsmInfo> causes a malloc
+  //        error when this LLVMDisasmContext is deleted.
+  const TargetAsmInfo *Tai;
+  // The assembly context for creating symbols and MCExprs.
+  llvm::OwningPtr<const llvm::MCContext> Ctx;
+  // The disassembler for the target architecture.
+  llvm::OwningPtr<const llvm::MCDisassembler> DisAsm;
+  // The instruction printer for the target architecture.
+  llvm::OwningPtr<llvm::MCInstPrinter> IP;
+
+public:
+  LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType,
+	  LLVMOpInfoCallback getOpInfo,
+                    LLVMSymbolLookupCallback symbolLookUp,
+                    const Target *theTarget, const MCAsmInfo *mAI,
+                    llvm::TargetMachine *tM, const TargetAsmInfo *tai,
+                    llvm::MCContext *ctx, const MCDisassembler *disAsm,
+                    MCInstPrinter *iP) : TripleName(tripleName),
+                    DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo),
+                    SymbolLookUp(symbolLookUp), TheTarget(theTarget), Tai(tai) {
+    TM.reset(tM);
+    MAI.reset(mAI);
+    Ctx.reset(ctx);
+    DisAsm.reset(disAsm);
+    IP.reset(iP);
+  }
+  const MCDisassembler *getDisAsm() const { return DisAsm.get(); }
+  MCInstPrinter *getIP() { return IP.get(); }
+};
+
+} // namespace llvm





More information about the llvm-commits mailing list