[llvm-commits] [llvm] r91749 - in /llvm/trunk: cmake/modules/ lib/Target/X86/ lib/Target/X86/Disassembler/ utils/TableGen/

Sat Dec 19 10:50:30 PST 2009

Hi Sean,

This is causing problems in the x86_64 Linux builds (a circular
dependency in the libraries) and the AuroAUX build (not sure about
this, some syntax error but I didn't see the problem, I suspect a
missing type).

I disabled (#if 0'd) it for now, can you take a look?

 - Daniel

On Fri, Dec 18, 2009 at 6:59 PM, Sean Callanan <scallanan at apple.com> wrote:
> Author: spyffe
> Date: Fri Dec 18 20:59:52 2009
> New Revision: 91749
>
> URL: http://llvm.org/viewvc/llvm-project?rev=91749&view=rev
> Log:
> Table-driven disassembler for the X86 architecture (16-, 32-, and 64-bit
> incarnations), integrated into the MC framework.
>
> The disassembler is table-driven, using a custom TableGen backend to
> generate hierarchical tables optimized for fast decode.  The disassembler
> consumes MemoryObjects and produces arrays of MCInsts, adhering to the
> abstract base class MCDisassembler (llvm/MC/MCDisassembler.h).
>
> The disassembler is documented in detail in
>
> - lib/Target/X86/Disassembler/X86Disassembler.cpp (disassembler runtime)
> - utils/TableGen/DisassemblerEmitter.cpp (table emitter)
>
> You can test the disassembler by running llvm-mc -disassemble for i386
> or x86_64 targets.  Please let me know if you encounter any problems
> with it.
>
> Added:
>    llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.h
>    llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
>    llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
>    llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
>    llvm/trunk/utils/TableGen/X86DisassemblerShared.h
>    llvm/trunk/utils/TableGen/X86DisassemblerTables.cpp
>    llvm/trunk/utils/TableGen/X86DisassemblerTables.h
>    llvm/trunk/utils/TableGen/X86ModRMFilters.h
>    llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp
>    llvm/trunk/utils/TableGen/X86RecognizableInstr.h
> Modified:
>    llvm/trunk/cmake/modules/LLVMLibDeps.cmake
>    llvm/trunk/lib/Target/X86/CMakeLists.txt
>    llvm/trunk/lib/Target/X86/Disassembler/CMakeLists.txt
>    llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.cpp
>    llvm/trunk/lib/Target/X86/Makefile
>    llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
>    llvm/trunk/utils/TableGen/CMakeLists.txt
>    llvm/trunk/utils/TableGen/DisassemblerEmitter.cpp
>
> Modified: llvm/trunk/cmake/modules/LLVMLibDeps.cmake
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/cmake/modules/LLVMLibDeps.cmake?rev=91749&r1=91748&r2=91749&view=diff
>
> ==============================================================================
> --- llvm/trunk/cmake/modules/LLVMLibDeps.cmake (original)
> +++ llvm/trunk/cmake/modules/LLVMLibDeps.cmake Fri Dec 18 20:59:52 2009
> @@ -2,7 +2,7 @@
>  set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMARMInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMARMInfo LLVMSupport)
> -set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
> +set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaCodeGen LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport)
>  set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget)
> @@ -11,12 +11,12 @@
>  set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
>  set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem)
> -set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
> +set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMSupport)
>  set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
>  set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMSupport)
> -set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
> +set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMSupport)
>  set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
> @@ -31,7 +31,7 @@
>  set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem)
>  set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
>  set(MSVC_LIB_DEPS_LLVMMSILInfo LLVMSupport)
> -set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget)
> +set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430CodeGen LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport)
>  set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMSystem LLVMTarget)
> @@ -40,17 +40,17 @@
>  set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport)
> -set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget)
> +set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport)
>  set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
>  set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget)
> -set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
> +set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMSupport)
>  set(MSVC_LIB_DEPS_LLVMSupport LLVMSystem)
>  set(MSVC_LIB_DEPS_LLVMSystem )
> -set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZInfo LLVMTarget)
> +set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZCodeGen LLVMSystemZInfo LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget)
>  set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport)
>  set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport LLVMSystem)
>
> Modified: llvm/trunk/lib/Target/X86/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/CMakeLists.txt?rev=91749&r1=91748&r2=91749&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/CMakeLists.txt (original)
> +++ llvm/trunk/lib/Target/X86/CMakeLists.txt Fri Dec 18 20:59:52 2009
> @@ -3,6 +3,7 @@
>  tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header)
>  tablegen(X86GenRegisterNames.inc -gen-register-enums)
>  tablegen(X86GenRegisterInfo.inc -gen-register-desc)
> +tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
>  tablegen(X86GenInstrNames.inc -gen-instr-enums)
>  tablegen(X86GenInstrInfo.inc -gen-instr-desc)
>  tablegen(X86GenAsmWriter.inc -gen-asm-writer)
>
> Modified: llvm/trunk/lib/Target/X86/Disassembler/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/CMakeLists.txt?rev=91749&r1=91748&r2=91749&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/Disassembler/CMakeLists.txt (original)
> +++ llvm/trunk/lib/Target/X86/Disassembler/CMakeLists.txt Fri Dec 18 20:59:52 2009
> @@ -2,5 +2,6 @@
>
>  add_llvm_library(LLVMX86Disassembler
>   X86Disassembler.cpp
> +  X86DisassemblerDecoder.c
>   )
>  add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen)
>
> Modified: llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.cpp?rev=91749&r1=91748&r2=91749&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.cpp (original)
> +++ llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.cpp Fri Dec 18 20:59:52 2009
> @@ -6,18 +6,450 @@
>  // License. See LICENSE.TXT for details.
>  //
>  //===----------------------------------------------------------------------===//
> +//
> +// This file is part of the X86 Disassembler.
> +// It contains code to translate the data produced by the decoder into
> +//  MCInsts.
> +// Documentation for the disassembler can be found in X86Disassembler.h.
> +//
> +//===----------------------------------------------------------------------===//
>
> +#include "X86Disassembler.h"
> +#include "X86DisassemblerDecoder.h"
> +#include "X86InstrInfo.h"
> +
> +#include "llvm/MC/MCDisassembler.h"
>  #include "llvm/MC/MCDisassembler.h"
> +#include "llvm/MC/MCInst.h"
>  #include "llvm/Target/TargetRegistry.h"
> -#include "X86.h"
> +#include "llvm/Support/MemoryObject.h"
> +#include "llvm/Support/ErrorHandling.h"
> +#include "llvm/Support/raw_ostream.h"
>  using namespace llvm;
> +using namespace llvm::X86Disassembler;
> +
> +namespace llvm {
> +
> +// Fill-ins to make the compiler happy.  These constants are never actually
> +//   assigned; they are just filler to make an automatically-generated switch
> +//   statement work.
> +namespace X86 {
> +  enum {
> +    BX_SI = 500,
> +    BX_DI = 501,
> +    BP_SI = 502,
> +    BP_DI = 503,
> +    sib   = 504,
> +    sib64 = 505
> +  };
> +}
> +
> +}
> +
> +static void translateInstruction(MCInst &target,
> +                                 InternalInstruction &source);
> +
> +X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
> +    MCDisassembler(),
> +    fMode(mode) {
> +}
> +
> +X86GenericDisassembler::~X86GenericDisassembler() {
> +}
> +
> +/// regionReader - a callback function that wraps the readByte method from
> +///   MemoryObject.
> +///
> +/// @param arg      - The generic callback parameter.  In this case, this should
> +///                   be a pointer to a MemoryObject.
> +/// @param byte     - A pointer to the byte to be read.
> +/// @param address  - The address to be read.
> +static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
> +  MemoryObject* region = static_cast<MemoryObject*>(arg);
> +  return region->readByte(address, byte);
> +}
> +
> +/// logger - a callback function that wraps the operator<< method from
> +///   raw_ostream.
> +///
> +/// @param arg      - The generic callback parameter.  This should be a pointe
> +///                   to a raw_ostream.
> +/// @param log      - A string to be logged.  logger() adds a newline.
> +static void logger(void* arg, const char* log) {
> +  if (!arg)
> +    return;
> +
> +  raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
> +  vStream << log << "\n";
> +}
> +
> +//
> +// Public interface for the disassembler
> +//
> +
> +bool X86GenericDisassembler::getInstruction(MCInst &instr,
> +                                            uint64_t &size,
> +                                            const MemoryObject &region,
> +                                            uint64_t address,
> +                                            raw_ostream &vStream) const {
> +  InternalInstruction internalInstr;
> +
> +  int ret = decodeInstruction(&internalInstr,
> +                              regionReader,
> +                              (void*)&region,
> +                              logger,
> +                              (void*)&vStream,
> +                              address,
> +                              fMode);
> +
> +  if(ret) {
> +    size = internalInstr.readerCursor - address;
> +    return false;
> +  }
> +  else {
> +    size = internalInstr.length;
> +    translateInstruction(instr, internalInstr);
> +    return true;
> +  }
> +}
> +
> +//
> +// Private code that translates from struct InternalInstructions to MCInsts.
> +//
> +
> +/// translateRegister - Translates an internal register to the appropriate LLVM
> +///   register, and appends it as an operand to an MCInst.
> +///
> +/// @param mcInst     - The MCInst to append to.
> +/// @param reg        - The Reg to append.
> +static void translateRegister(MCInst &mcInst, Reg reg) {
> +#define ENTRY(x) X86::x,
> +  uint8_t llvmRegnums[] = {
> +    ALL_REGS
> +    0
> +  };
> +#undef ENTRY
> +
> +  uint8_t llvmRegnum = llvmRegnums[reg];
> +  mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
> +}
> +
> +/// translateImmediate  - Appends an immediate operand to an MCInst.
> +///
> +/// @param mcInst       - The MCInst to append to.
> +/// @param immediate    - The immediate value to append.
> +static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
> +  mcInst.addOperand(MCOperand::CreateImm(immediate));
> +}
> +
> +/// translateRMRegister - Translates a register stored in the R/M field of the
> +///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
> +/// @param mcInst       - The MCInst to append to.
> +/// @param insn         - The internal instruction to extract the R/M field
> +///                       from.
> +static void translateRMRegister(MCInst &mcInst,
> +                                InternalInstruction &insn) {
> +  assert(insn.eaBase != EA_BASE_sib && insn.eaBase != EA_BASE_sib64 &&
> +         "A R/M register operand may not have a SIB byte");
> +
> +  switch (insn.eaBase) {
> +  case EA_BASE_NONE:
> +    llvm_unreachable("EA_BASE_NONE for ModR/M base");
> +    break;
> +#define ENTRY(x) case EA_BASE_##x:
> +  ALL_EA_BASES
> +#undef ENTRY
> +    llvm_unreachable("A R/M register operand may not have a base; "
> +                     "the operand must be a register.");
> +    break;
> +#define ENTRY(x)                                                        \
> +  case EA_REG_##x:                                                    \
> +    mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
> +  ALL_REGS
> +#undef ENTRY
> +  default:
> +    llvm_unreachable("Unexpected EA base register");
> +  }
> +}
> +
> +/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
> +///   fields of an internal instruction (and possibly its SIB byte) to a memory
> +///   operand in LLVM's format, and appends it to an MCInst.
> +///
> +/// @param mcInst       - The MCInst to append to.
> +/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
> +///                       from.
> +static void translateRMMemory(MCInst &mcInst,
> +                              InternalInstruction &insn) {
> +  // Addresses in an MCInst are represented as five operands:
> +  //   1. basereg       (register)  The R/M base, or (if there is a SIB) the
> +  //                                SIB base
> +  //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified
> +  //                                scale amount
> +  //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
> +  //                                the index (which is multiplied by the
> +  //                                scale amount)
> +  //   4. displacement  (immediate) 0, or the displacement if there is one
> +  //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
> +  //                                if we have segment overrides
> +
> +  MCOperand baseReg;
> +  MCOperand scaleAmount;
> +  MCOperand indexReg;
> +  MCOperand displacement;
> +  MCOperand segmentReg;
> +
> +  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
> +    if (insn.sibBase != SIB_BASE_NONE) {
> +      switch (insn.sibBase) {
> +      default:
> +        llvm_unreachable("Unexpected sibBase");
> +#define ENTRY(x)                                          \
> +      case SIB_BASE_##x:                                \
> +        baseReg = MCOperand::CreateReg(X86::x); break;
> +      ALL_SIB_BASES
> +#undef ENTRY
> +      }
> +    } else {
> +      baseReg = MCOperand::CreateReg(0);
> +    }
> +
> +    if (insn.sibIndex != SIB_INDEX_NONE) {
> +      switch (insn.sibIndex) {
> +      default:
> +        llvm_unreachable("Unexpected sibIndex");
> +#define ENTRY(x)                                            \
> +      case SIB_INDEX_##x:                                 \
> +        indexReg = MCOperand::CreateReg(X86::x); break;
> +      EA_BASES_32BIT
> +      EA_BASES_64BIT
> +#undef ENTRY
> +      }
> +    } else {
> +      indexReg = MCOperand::CreateReg(0);
> +    }
> +
> +    scaleAmount = MCOperand::CreateImm(insn.sibScale);
> +  } else {
> +    switch (insn.eaBase) {
> +    case EA_BASE_NONE:
> +      assert(insn.eaDisplacement != EA_DISP_NONE &&
> +             "EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
> +
> +      if (insn.mode == MODE_64BIT)
> +        baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
> +      else
> +        baseReg = MCOperand::CreateReg(0);
> +
> +      indexReg = MCOperand::CreateReg(0);
> +      break;
> +    case EA_BASE_BX_SI:
> +      baseReg = MCOperand::CreateReg(X86::BX);
> +      indexReg = MCOperand::CreateReg(X86::SI);
> +      break;
> +    case EA_BASE_BX_DI:
> +      baseReg = MCOperand::CreateReg(X86::BX);
> +      indexReg = MCOperand::CreateReg(X86::DI);
> +      break;
> +    case EA_BASE_BP_SI:
> +      baseReg = MCOperand::CreateReg(X86::BP);
> +      indexReg = MCOperand::CreateReg(X86::SI);
> +      break;
> +    case EA_BASE_BP_DI:
> +      baseReg = MCOperand::CreateReg(X86::BP);
> +      indexReg = MCOperand::CreateReg(X86::DI);
> +      break;
> +    default:
> +      indexReg = MCOperand::CreateReg(0);
> +      switch (insn.eaBase) {
> +      default:
> +        llvm_unreachable("Unexpected eaBase");
> +        break;
> +        // Here, we will use the fill-ins defined above.  However,
> +        //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
> +        //   sib and sib64 were handled in the top-level if, so they're only
> +        //   placeholders to keep the compiler happy.
> +#define ENTRY(x)                                        \
> +      case EA_BASE_##x:                                 \
> +        baseReg = MCOperand::CreateReg(X86::x); break;
> +      ALL_EA_BASES
> +#undef ENTRY
> +#define ENTRY(x) case EA_REG_##x:
> +      ALL_REGS
> +#undef ENTRY
> +        llvm_unreachable("A R/M memory operand may not be a register; "
> +                         "the base field must be a base.");
> +            break;
> +      }
> +    }
> +  }
> +
> +  displacement = MCOperand::CreateImm(insn.displacement);
> +
> +  static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
> +    0,        // SEG_OVERRIDE_NONE
> +    X86::CS,
> +    X86::SS,
> +    X86::DS,
> +    X86::ES,
> +    X86::FS,
> +    X86::GS
> +  };
> +
> +  segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
> +
> +  mcInst.addOperand(baseReg);
> +  mcInst.addOperand(scaleAmount);
> +  mcInst.addOperand(indexReg);
> +  mcInst.addOperand(displacement);
> +  mcInst.addOperand(segmentReg);
> +}
> +
> +/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
> +///   byte of an instruction to LLVM form, and appends it to an MCInst.
> +///
> +/// @param mcInst       - The MCInst to append to.
> +/// @param operand      - The operand, as stored in the descriptor table.
> +/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
> +///                       from.
> +static void translateRM(MCInst &mcInst,
> +                        OperandSpecifier &operand,
> +                        InternalInstruction &insn) {
> +  switch (operand.type) {
> +  default:
> +    llvm_unreachable("Unexpected type for a R/M operand");
> +  case TYPE_R8:
> +  case TYPE_R16:
> +  case TYPE_R32:
> +  case TYPE_R64:
> +  case TYPE_Rv:
> +  case TYPE_MM:
> +  case TYPE_MM32:
> +  case TYPE_MM64:
> +  case TYPE_XMM:
> +  case TYPE_XMM32:
> +  case TYPE_XMM64:
> +  case TYPE_XMM128:
> +  case TYPE_DEBUGREG:
> +  case TYPE_CR32:
> +  case TYPE_CR64:
> +    translateRMRegister(mcInst, insn);
> +    break;
> +  case TYPE_M:
> +  case TYPE_M8:
> +  case TYPE_M16:
> +  case TYPE_M32:
> +  case TYPE_M64:
> +  case TYPE_M128:
> +  case TYPE_M512:
> +  case TYPE_Mv:
> +  case TYPE_M32FP:
> +  case TYPE_M64FP:
> +  case TYPE_M80FP:
> +  case TYPE_M16INT:
> +  case TYPE_M32INT:
> +  case TYPE_M64INT:
> +  case TYPE_M1616:
> +  case TYPE_M1632:
> +  case TYPE_M1664:
> +    translateRMMemory(mcInst, insn);
> +    break;
> +  }
> +}
> +
> +/// translateFPRegister - Translates a stack position on the FPU stack to its
> +///   LLVM form, and appends it to an MCInst.
> +///
> +/// @param mcInst       - The MCInst to append to.
> +/// @param stackPos     - The stack position to translate.
> +static void translateFPRegister(MCInst &mcInst,
> +                                uint8_t stackPos) {
> +  assert(stackPos < 8 && "Invalid FP stack position");
> +
> +  mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
> +}
> +
> +/// translateOperand - Translates an operand stored in an internal instruction
> +///   to LLVM's format and appends it to an MCInst.
> +///
> +/// @param mcInst       - The MCInst to append to.
> +/// @param operand      - The operand, as stored in the descriptor table.
> +/// @param insn         - The internal instruction.
> +static void translateOperand(MCInst &mcInst,
> +                             OperandSpecifier &operand,
> +                             InternalInstruction &insn) {
> +  switch (operand.encoding) {
> +  default:
> +    llvm_unreachable("Unhandled operand encoding during translation");
> +  case ENCODING_REG:
> +    translateRegister(mcInst, insn.reg);
> +    break;
> +  case ENCODING_RM:
> +    translateRM(mcInst, operand, insn);
> +    break;
> +  case ENCODING_CB:
> +  case ENCODING_CW:
> +  case ENCODING_CD:
> +  case ENCODING_CP:
> +  case ENCODING_CO:
> +  case ENCODING_CT:
> +    llvm_unreachable("Translation of code offsets isn't supported.");
> +  case ENCODING_IB:
> +  case ENCODING_IW:
> +  case ENCODING_ID:
> +  case ENCODING_IO:
> +  case ENCODING_Iv:
> +  case ENCODING_Ia:
> +    translateImmediate(mcInst,
> +                       insn.immediates[insn.numImmediatesTranslated++]);
> +    break;
> +  case ENCODING_RB:
> +  case ENCODING_RW:
> +  case ENCODING_RD:
> +  case ENCODING_RO:
> +    translateRegister(mcInst, insn.opcodeRegister);
> +    break;
> +  case ENCODING_I:
> +    translateFPRegister(mcInst, insn.opcodeModifier);
> +    break;
> +  case ENCODING_Rv:
> +    translateRegister(mcInst, insn.opcodeRegister);
> +    break;
> +  case ENCODING_DUP:
> +    translateOperand(mcInst,
> +                     insn.spec->operands[operand.type - TYPE_DUP0],
> +                     insn);
> +    break;
> +  }
> +}
> +
> +/// translateInstruction - Translates an internal instruction and all its
> +///   operands to an MCInst.
> +///
> +/// @param mcInst       - The MCInst to populate with the instruction's data.
> +/// @param insn         - The internal instruction.
> +static void translateInstruction(MCInst &mcInst,
> +                                 InternalInstruction &insn) {
> +  assert(insn.spec);
> +
> +  mcInst.setOpcode(insn.instructionID);
> +
> +  int index;
> +
> +  insn.numImmediatesTranslated = 0;
> +
> +  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
> +    if (insn.spec->operands[index].encoding != ENCODING_NONE)
> +      translateOperand(mcInst, insn.spec->operands[index], insn);
> +  }
> +}
>
>  static const MCDisassembler *createX86_32Disassembler(const Target &T) {
> -  return 0;
> +  return new X86Disassembler::X86_32Disassembler;
>  }
>
>  static const MCDisassembler *createX86_64Disassembler(const Target &T) {
> -  return 0;
> +  return new X86Disassembler::X86_64Disassembler;
>  }
>
>  extern "C" void LLVMInitializeX86Disassembler() {
>
> Added: llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.h?rev=91749&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.h (added)
> +++ llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.h Fri Dec 18 20:59:52 2009
> @@ -0,0 +1,150 @@
> +//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
> +// 64-bit X86 instruction sets.  The main decode sequence for an assembly
> +// instruction in this disassembler is:
> +//
> +// 1. Read the prefix bytes and determine the attributes of the instruction.
> +//    These attributes, recorded in enum attributeBits
> +//    (X86DisassemblerDecoderCommon.h), form a bitmask.  The table CONTEXTS_SYM
> +//    provides a mapping from bitmasks to contexts, which are represented by
> +//    enum InstructionContext (ibid.).
> +//
> +// 2. Read the opcode, and determine what kind of opcode it is.  The
> +//    disassembler distinguishes four kinds of opcodes, which are enumerated in
> +//    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
> +//    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
> +//    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context.
> +//
> +// 3. Depending on the opcode type, look in one of four ClassDecision structures
> +//    (X86DisassemblerDecoderCommon.h).  Use the opcode class to determine which
> +//    OpcodeDecision (ibid.) to look the opcode in.  Look up the opcode, to get
> +//    a ModRMDecision (ibid.).
> +//
> +// 4. Some instructions, such as escape opcodes or extended opcodes, or even
> +//    instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
> +//    ModR/M byte to complete decode.  The ModRMDecision's type is an entry from
> +//    ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
> +//    ModR/M byte is required and how to interpret it.
> +//
> +// 5. After resolving the ModRMDecision, the disassembler has a unique ID
> +//    of type InstrUID (X86DisassemblerDecoderCommon.h).  Looking this ID up in
> +//    INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
> +//    meanings of its operands.
> +//
> +// 6. For each operand, its encoding is an entry from OperandEncoding
> +//    (X86DisassemblerDecoderCommon.h) and its type is an entry from
> +//    OperandType (ibid.).  The encoding indicates how to read it from the
> +//    instruction; the type indicates how to interpret the value once it has
> +//    been read.  For example, a register operand could be stored in the R/M
> +//    field of the ModR/M byte, the REG field of the ModR/M byte, or added to
> +//    the main opcode.  This is orthogonal from its meaning (an GPR or an XMM
> +//    register, for instance).  Given this information, the operands can be
> +//    extracted and interpreted.
> +//
> +// 7. As the last step, the disassembler translates the instruction information
> +//    and operands into a format understandable by the client - in this case, an
> +//    MCInst for use by the MC infrastructure.
> +//
> +// The disassembler is broken broadly into two parts: the table emitter that
> +// emits the instruction decode tables discussed above during compilation, and
> +// the disassembler itself.  The table emitter is documented in more detail in
> +// utils/TableGen/X86DisassemblerEmitter.h.
> +//
> +// X86Disassembler.h contains the public interface for the disassembler,
> +//   adhering to the MCDisassembler interface.
> +// X86Disassembler.cpp contains the code responsible for step 7, and for
> +//   invoking the decoder to execute steps 1-6.
> +// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
> +//   table emitter and the disassembler.
> +// X86DisassemblerDecoder.h contains the public interface of the decoder,
> +//   factored out into C for possible use by other projects.
> +// X86DisassemblerDecoder.c contains the source code of the decoder, which is
> +//   responsible for steps 1-6.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef X86DISASSEMBLER_H
> +#define X86DISASSEMBLER_H
> +
> +#define INSTRUCTION_SPECIFIER_FIELDS  \
> +  const char*             name;
> +
> +#define INSTRUCTION_IDS               \
> +  InstrUID*  instructionIDs;
> +
> +#include "X86DisassemblerDecoderCommon.h"
> +
> +#undef INSTRUCTION_SPECIFIER_FIELDS
> +#undef INSTRUCTION_IDS
> +
> +#include "llvm/MC/MCDisassembler.h"
> +
> +struct InternalInstruction;
> +
> +namespace llvm {
> +
> +class MCInst;
> +class MemoryObject;
> +class raw_ostream;
> +
> +namespace X86Disassembler {
> +
> +/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
> +///   All each platform class should have to do is subclass the constructor, and
> +///   provide a different disassemblerMode value.
> +class X86GenericDisassembler : public MCDisassembler {
> +protected:
> +  /// Constructor     - Initializes the disassembler.
> +  ///
> +  /// @param mode     - The X86 architecture mode to decode for.
> +  X86GenericDisassembler(DisassemblerMode mode);
> +public:
> +  ~X86GenericDisassembler();
> +
> +  /// getInstruction - See MCDisassembler.
> +  bool getInstruction(MCInst &instr,
> +                      uint64_t &size,
> +                      const MemoryObject &region,
> +                      uint64_t address,
> +                      raw_ostream &vStream) const;
> +private:
> +  DisassemblerMode              fMode;
> +};
> +
> +/// X86_16Disassembler - 16-bit X86 disassembler.
> +class X86_16Disassembler : public X86GenericDisassembler {
> +public:
> +  X86_16Disassembler() :
> +    X86GenericDisassembler(MODE_16BIT) {
> +  }
> +};
> +
> +/// X86_16Disassembler - 32-bit X86 disassembler.
> +class X86_32Disassembler : public X86GenericDisassembler {
> +public:
> +  X86_32Disassembler() :
> +    X86GenericDisassembler(MODE_32BIT) {
> +  }
> +};
> +
> +/// X86_16Disassembler - 64-bit X86 disassembler.
> +class X86_64Disassembler : public X86GenericDisassembler {
> +public:
> +  X86_64Disassembler() :
> +    X86GenericDisassembler(MODE_64BIT) {
> +  }
> +};
> +
> +} // namespace X86Disassembler
> +
> +} // namespace llvm
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c?rev=91749&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c (added)
> +++ llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c Fri Dec 18 20:59:52 2009
> @@ -0,0 +1,1361 @@
> +/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
> + *
> + *                     The LLVM Compiler Infrastructure
> + *
> + * This file is distributed under the University of Illinois Open Source
> + * License. See LICENSE.TXT for details.
> + *
> + *===----------------------------------------------------------------------===*
> + *
> + * This file is part of the X86 Disassembler.
> + * It contains the implementation of the instruction decoder.
> + * Documentation for the disassembler can be found in X86Disassembler.h.
> + *
> + *===----------------------------------------------------------------------===*/
> +
> +#include <assert.h>   /* for assert()     */
> +#include <stdarg.h>   /* for va_*()       */
> +#include <stdio.h>    /* for vsnprintf()  */
> +#include <stdlib.h>   /* for exit()       */
> +#include <string.h>   /* for bzero()      */
> +
> +#include "X86DisassemblerDecoder.h"
> +
> +#include "X86GenDisassemblerTables.inc"
> +
> +#define TRUE  1
> +#define FALSE 0
> +
> +#ifdef __GNUC__
> +#define NORETURN __attribute__((noreturn))
> +#else
> +#define NORETURN
> +#endif
> +
> +#define unreachable(s)                                      \
> +  do {                                                      \
> +    fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, s);  \
> +    exit(-1);                                               \
> +  } while (0);
> +
> +/*
> + * contextForAttrs - Client for the instruction context table.  Takes a set of
> + *   attributes and returns the appropriate decode context.
> + *
> + * @param attrMask  - Attributes, from the enumeration attributeBits.
> + * @return          - The InstructionContext to use when looking up an
> + *                    an instruction with these attributes.
> + */
> +static inline InstructionContext contextForAttrs(uint8_t attrMask) {
> +  return CONTEXTS_SYM[attrMask];
> +}
> +
> +/*
> + * modRMRequired - Reads the appropriate instruction table to determine whether
> + *   the ModR/M byte is required to decode a particular instruction.
> + *
> + * @param type        - The opcode type (i.e., how many bytes it has).
> + * @param insnContext - The context for the instruction, as returned by
> + *                      contextForAttrs.
> + * @param opcode      - The last byte of the instruction's opcode, not counting
> + *                      ModR/M extensions and escapes.
> + * @return            - TRUE if the ModR/M byte is required, FALSE otherwise.
> + */
> +static inline int modRMRequired(OpcodeType type,
> +                                InstructionContext insnContext,
> +                                uint8_t opcode) {
> +  const struct ContextDecision* decision;
> +
> +  switch (type) {
> +  case ONEBYTE:
> +    decision = &ONEBYTE_SYM;
> +    break;
> +  case TWOBYTE:
> +    decision = &TWOBYTE_SYM;
> +    break;
> +  case THREEBYTE_38:
> +    decision = &THREEBYTE38_SYM;
> +    break;
> +  case THREEBYTE_3A:
> +    decision = &THREEBYTE3A_SYM;
> +    break;
> +  }
> +
> +  return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
> +    modrm_type != MODRM_ONEENTRY;
> +
> +  unreachable("Unknown opcode type");
> +  return 0;
> +}
> +
> +/*
> + * decode - Reads the appropriate instruction table to obtain the unique ID of
> + *   an instruction.
> + *
> + * @param type        - See modRMRequired().
> + * @param insnContext - See modRMRequired().
> + * @param opcode      - See modRMRequired().
> + * @param modRM       - The ModR/M byte if required, or any value if not.
> + */
> +static inline InstrUID decode(OpcodeType type,
> +                               InstructionContext insnContext,
> +                               uint8_t opcode,
> +                               uint8_t modRM) {
> +  struct ModRMDecision* dec;
> +
> +  switch (type) {
> +  default:
> +    unreachable("Unknown opcode type");
> +  case ONEBYTE:
> +    dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
> +    break;
> +  case TWOBYTE:
> +    dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
> +    break;
> +  case THREEBYTE_38:
> +    dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
> +    break;
> +  case THREEBYTE_3A:
> +    dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
> +    break;
> +  }
> +
> +  switch (dec->modrm_type) {
> +  default:
> +    unreachable("Corrupt table!  Unknown modrm_type");
> +  case MODRM_ONEENTRY:
> +    return dec->instructionIDs[0];
> +  case MODRM_SPLITRM:
> +    if (modFromModRM(modRM) == 0x3)
> +      return dec->instructionIDs[1];
> +    else
> +      return dec->instructionIDs[0];
> +  case MODRM_FULL:
> +    return dec->instructionIDs[modRM];
> +  }
> +
> +  return 0;
> +}
> +
> +/*
> + * specifierForUID - Given a UID, returns the name and operand specification for
> + *   that instruction.
> + *
> + * @param uid - The unique ID for the instruction.  This should be returned by
> + *              decode(); specifierForUID will not check bounds.
> + * @return    - A pointer to the specification for that instruction.
> + */
> +static inline struct InstructionSpecifier* specifierForUID(InstrUID uid) {
> +  return &INSTRUCTIONS_SYM[uid];
> +}
> +
> +/*
> + * consumeByte - Uses the reader function provided by the user to consume one
> + *   byte from the instruction's memory and advance the cursor.
> + *
> + * @param insn  - The instruction with the reader function to use.  The cursor
> + *                for this instruction is advanced.
> + * @param byte  - A pointer to a pre-allocated memory buffer to be populated
> + *                with the data read.
> + * @return      - 0 if the read was successful; nonzero otherwise.
> + */
> +static inline int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
> +  int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
> +
> +  if (!ret)
> +    ++(insn->readerCursor);
> +
> +  return ret;
> +}
> +
> +/*
> + * lookAtByte - Like consumeByte, but does not advance the cursor.
> + *
> + * @param insn  - See consumeByte().
> + * @param byte  - See consumeByte().
> + * @return      - See consumeByte().
> + */
> +static inline int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
> +  return insn->reader(insn->readerArg, byte, insn->readerCursor);
> +}
> +
> +static inline void unconsumeByte(struct InternalInstruction* insn) {
> +  insn->readerCursor--;
> +}
> +
> +#define CONSUME_FUNC(name, type)                                          \
> +  static inline int name(struct InternalInstruction* insn, type* ptr) {   \
> +    type combined = 0;                                                    \
> +    unsigned offset;                                                      \
> +    for (offset = 0; offset < sizeof(type); ++offset) {                   \
> +      uint8_t byte;                                                       \
> +      int ret = insn->reader(insn->readerArg,                             \
> +                             &byte,                                       \
> +                             insn->readerCursor + offset);                \
> +      if (ret)                                                            \
> +        return ret;                                                       \
> +      combined = combined | ((type)byte << ((type)offset * 8));           \
> +    }                                                                     \
> +    *ptr = combined;                                                      \
> +    insn->readerCursor += sizeof(type);                                   \
> +    return 0;                                                             \
> +  }
> +
> +/*
> + * consume* - Use the reader function provided by the user to consume data
> + *   values of various sizes from the instruction's memory and advance the
> + *   cursor appropriately.  These readers perform endian conversion.
> + *
> + * @param insn    - See consumeByte().
> + * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
> + *                  be populated with the data read.
> + * @return        - See consumeByte().
> + */
> +CONSUME_FUNC(consumeInt8, int8_t)
> +CONSUME_FUNC(consumeInt16, int16_t)
> +CONSUME_FUNC(consumeInt32, int32_t)
> +CONSUME_FUNC(consumeUInt16, uint16_t)
> +CONSUME_FUNC(consumeUInt32, uint32_t)
> +CONSUME_FUNC(consumeUInt64, uint64_t)
> +
> +/*
> + * dprintf - Uses the logging function provided by the user to log a single
> + *   message, typically without a carriage-return.
> + *
> + * @param insn    - The instruction containing the logging function.
> + * @param format  - See printf().
> + * @param ...     - See printf().
> + */
> +static inline void dprintf(struct InternalInstruction* insn,
> +                           const char* format,
> +                           ...) {
> +  char buffer[256];
> +  va_list ap;
> +
> +  if (!insn->dlog)
> +    return;
> +
> +  va_start(ap, format);
> +  (void)vsnprintf(buffer, sizeof(buffer), format, ap);
> +  va_end(ap);
> +
> +  insn->dlog(insn->dlogArg, buffer);
> +
> +  return;
> +}
> +
> +/*
> + * setPrefixPresent - Marks that a particular prefix is present at a particular
> + *   location.
> + *
> + * @param insn      - The instruction to be marked as having the prefix.
> + * @param prefix    - The prefix that is present.
> + * @param location  - The location where the prefix is located (in the address
> + *                    space of the instruction's reader).
> + */
> +static inline void setPrefixPresent(struct InternalInstruction* insn,
> +                                    uint8_t prefix,
> +                                    uint64_t location)
> +{
> +  insn->prefixPresent[prefix] = 1;
> +  insn->prefixLocations[prefix] = location;
> +}
> +
> +/*
> + * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
> + *   present at a given location.
> + *
> + * @param insn      - The instruction to be queried.
> + * @param prefix    - The prefix.
> + * @param location  - The location to query.
> + * @return          - Whether the prefix is at that location.
> + */
> +static inline BOOL isPrefixAtLocation(struct InternalInstruction* insn,
> +                                      uint8_t prefix,
> +                                      uint64_t location)
> +{
> +  if (insn->prefixPresent[prefix] == 1 &&
> +     insn->prefixLocations[prefix] == location)
> +    return TRUE;
> +  else
> +    return FALSE;
> +}
> +
> +/*
> + * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
> + *   instruction as having them.  Also sets the instruction's default operand,
> + *   address, and other relevant data sizes to report operands correctly.
> + *
> + * @param insn  - The instruction whose prefixes are to be read.
> + * @return      - 0 if the instruction could be read until the end of the prefix
> + *                bytes, and no prefixes conflicted; nonzero otherwise.
> + */
> +static int readPrefixes(struct InternalInstruction* insn) {
> +  BOOL isPrefix = TRUE;
> +  BOOL prefixGroups[4] = { FALSE };
> +  uint64_t prefixLocation;
> +  uint8_t byte;
> +
> +  BOOL hasAdSize = FALSE;
> +  BOOL hasOpSize = FALSE;
> +
> +  dprintf(insn, "readPrefixes()");
> +
> +  while (isPrefix) {
> +    prefixLocation = insn->readerCursor;
> +
> +    if (consumeByte(insn, &byte))
> +      return -1;
> +
> +    switch (byte) {
> +    case 0xf0:  /* LOCK */
> +    case 0xf2:  /* REPNE/REPNZ */
> +    case 0xf3:  /* REP or REPE/REPZ */
> +      if (prefixGroups[0])
> +        dprintf(insn, "Redundant Group 1 prefix");
> +      prefixGroups[0] = TRUE;
> +      setPrefixPresent(insn, byte, prefixLocation);
> +      break;
> +    case 0x2e:  /* CS segment override -OR- Branch not taken */
> +    case 0x36:  /* SS segment override -OR- Branch taken */
> +    case 0x3e:  /* DS segment override */
> +    case 0x26:  /* ES segment override */
> +    case 0x64:  /* FS segment override */
> +    case 0x65:  /* GS segment override */
> +      switch (byte) {
> +      case 0x2e:
> +        insn->segmentOverride = SEG_OVERRIDE_CS;
> +        break;
> +      case 0x36:
> +        insn->segmentOverride = SEG_OVERRIDE_SS;
> +        break;
> +      case 0x3e:
> +        insn->segmentOverride = SEG_OVERRIDE_DS;
> +        break;
> +      case 0x26:
> +        insn->segmentOverride = SEG_OVERRIDE_ES;
> +        break;
> +      case 0x64:
> +        insn->segmentOverride = SEG_OVERRIDE_FS;
> +        break;
> +      case 0x65:
> +        insn->segmentOverride = SEG_OVERRIDE_GS;
> +        break;
> +      default:
> +        unreachable("Unhandled override");
> +      }
> +      if (prefixGroups[1])
> +        dprintf(insn, "Redundant Group 2 prefix");
> +      prefixGroups[1] = TRUE;
> +      setPrefixPresent(insn, byte, prefixLocation);
> +      break;
> +    case 0x66:  /* Operand-size override */
> +      if (prefixGroups[2])
> +        dprintf(insn, "Redundant Group 3 prefix");
> +      prefixGroups[2] = TRUE;
> +      hasOpSize = TRUE;
> +      setPrefixPresent(insn, byte, prefixLocation);
> +      break;
> +    case 0x67:  /* Address-size override */
> +      if (prefixGroups[3])
> +        dprintf(insn, "Redundant Group 4 prefix");
> +      prefixGroups[3] = TRUE;
> +      hasAdSize = TRUE;
> +      setPrefixPresent(insn, byte, prefixLocation);
> +      break;
> +    default:    /* Not a prefix byte */
> +      isPrefix = FALSE;
> +      break;
> +    }
> +
> +    if (isPrefix)
> +      dprintf(insn, "Found prefix 0x%hhx", byte);
> +  }
> +
> +  if (insn->mode == MODE_64BIT) {
> +    if ((byte & 0xf0) == 0x40) {
> +      uint8_t opcodeByte;
> +
> +      if(lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
> +        dprintf(insn, "Redundant REX prefix");
> +        return -1;
> +      }
> +
> +      insn->rexPrefix = byte;
> +      insn->necessaryPrefixLocation = insn->readerCursor - 2;
> +
> +      dprintf(insn, "Found REX prefix 0x%hhx", byte);
> +    } else {
> +      unconsumeByte(insn);
> +      insn->necessaryPrefixLocation = insn->readerCursor - 1;
> +    }
> +  } else {
> +    unconsumeByte(insn);
> +  }
> +
> +  if (insn->mode == MODE_16BIT) {
> +    insn->registerSize       = (hasOpSize ? 4 : 2);
> +    insn->addressSize        = (hasAdSize ? 4 : 2);
> +    insn->displacementSize   = (hasAdSize ? 4 : 2);
> +    insn->immediateSize      = (hasOpSize ? 4 : 2);
> +  } else if (insn->mode == MODE_32BIT) {
> +    insn->registerSize       = (hasOpSize ? 2 : 4);
> +    insn->addressSize        = (hasAdSize ? 2 : 4);
> +    insn->displacementSize   = (hasAdSize ? 2 : 4);
> +    insn->immediateSize      = (hasAdSize ? 2 : 4);
> +  } else if (insn->mode == MODE_64BIT) {
> +    if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
> +      insn->registerSize       = 8;
> +      insn->addressSize        = (hasAdSize ? 4 : 8);
> +      insn->displacementSize   = 4;
> +      insn->immediateSize      = 4;
> +    } else if (insn->rexPrefix) {
> +      insn->registerSize       = (hasOpSize ? 2 : 4);
> +      insn->addressSize        = (hasAdSize ? 4 : 8);
> +      insn->displacementSize   = (hasOpSize ? 2 : 4);
> +      insn->immediateSize      = (hasOpSize ? 2 : 4);
> +    } else {
> +      insn->registerSize       = (hasOpSize ? 2 : 4);
> +      insn->addressSize        = (hasAdSize ? 4 : 8);
> +      insn->displacementSize   = (hasOpSize ? 2 : 4);
> +      insn->immediateSize      = (hasOpSize ? 2 : 4);
> +    }
> +  }
> +
> +  return 0;
> +}
> +
> +/*
> + * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
> + *   extended or escape opcodes).
> + *
> + * @param insn  - The instruction whose opcode is to be read.
> + * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
> + */
> +static int readOpcode(struct InternalInstruction* insn) {
> +  /* Determine the length of the primary opcode */
> +
> +  uint8_t current;
> +
> +  dprintf(insn, "readOpcode()");
> +
> +  insn->opcodeType = ONEBYTE;
> +  if (consumeByte(insn, &current))
> +    return -1;
> +
> +  if (current == 0x0f) {
> +    dprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
> +
> +    insn->twoByteEscape = current;
> +
> +    if (consumeByte(insn, &current))
> +      return -1;
> +
> +    if (current == 0x38) {
> +      dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
> +
> +      insn->threeByteEscape = current;
> +
> +      if (consumeByte(insn, &current))
> +        return -1;
> +
> +      insn->opcodeType = THREEBYTE_38;
> +    } else if (current == 0x3a) {
> +      dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
> +
> +      insn->threeByteEscape = current;
> +
> +      if (consumeByte(insn, &current))
> +        return -1;
> +
> +      insn->opcodeType = THREEBYTE_3A;
> +    } else {
> +      dprintf(insn, "Didn't find a three-byte escape prefix");
> +
> +      insn->opcodeType = TWOBYTE;
> +    }
> +  }
> +
> +  /*
> +   * At this point we have consumed the full opcode.
> +   * Anything we consume from here on must be unconsumed.
> +   */
> +
> +  insn->opcode = current;
> +
> +  return 0;
> +}
> +
> +static int readModRM(struct InternalInstruction* insn);
> +
> +/*
> + * getIDWithAttrMask - Determines the ID of an instruction, consuming
> + *   the ModR/M byte as appropriate for extended and escape opcodes,
> + *   and using a supplied attribute mask.
> + *
> + * @param instructionID - A pointer whose target is filled in with the ID of the
> + *                        instruction.
> + * @param insn          - The instruction whose ID is to be determined.
> + * @param attrMask      - The attribute mask to search.
> + * @return              - 0 if the ModR/M could be read when needed or was not
> + *                        needed; nonzero otherwise.
> + */
> +static int getIDWithAttrMask(uint16_t* instructionID,
> +                             struct InternalInstruction* insn,
> +                             uint8_t attrMask) {
> +  BOOL hasModRMExtension;
> +
> +  uint8_t instructionClass;
> +
> +  instructionClass = contextForAttrs(attrMask);
> +
> +  hasModRMExtension = modRMRequired(insn->opcodeType,
> +                                    instructionClass,
> +                                    insn->opcode);
> +
> +  if (hasModRMExtension) {
> +    readModRM(insn);
> +
> +    *instructionID = decode(insn->opcodeType,
> +                            instructionClass,
> +                            insn->opcode,
> +                            insn->modRM);
> +  } else {
> +    *instructionID = decode(insn->opcodeType,
> +                            instructionClass,
> +                            insn->opcode,
> +                            0);
> +  }
> +
> +  return 0;
> +}
> +
> +/*
> + * is16BitEquivalent - Determines whether two instruction names refer to
> + * equivalent instructions but one is 16-bit whereas the other is not.
> + *
> + * @param orig  - The instruction that is not 16-bit
> + * @param equiv - The instruction that is 16-bit
> + */
> +static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
> +  off_t i;
> +
> +  for(i = 0;; i++) {
> +    if(orig[i] == '\0' && equiv[i] == '\0')
> +      return TRUE;
> +    if(orig[i] == '\0' || equiv[i] == '\0')
> +      return FALSE;
> +    if(orig[i] != equiv[i]) {
> +      if((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
> +        continue;
> +      if((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
> +        continue;
> +      if((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
> +        continue;
> +      return FALSE;
> +    }
> +  }
> +}
> +
> +/*
> + * is64BitEquivalent - Determines whether two instruction names refer to
> + * equivalent instructions but one is 64-bit whereas the other is not.
> + *
> + * @param orig  - The instruction that is not 64-bit
> + * @param equiv - The instruction that is 64-bit
> + */
> +static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
> +  off_t i;
> +
> +  for(i = 0;; i++) {
> +    if(orig[i] == '\0' && equiv[i] == '\0')
> +      return TRUE;
> +    if(orig[i] == '\0' || equiv[i] == '\0')
> +      return FALSE;
> +    if(orig[i] != equiv[i]) {
> +      if((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
> +        continue;
> +      if((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
> +        continue;
> +      if((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
> +        continue;
> +      return FALSE;
> +    }
> +  }
> +}
> +
> +
> +/*
> + * getID - Determines the ID of an instruction, consuming the ModR/M byte as
> + *   appropriate for extended and escape opcodes.  Determines the attributes and
> + *   context for the instruction before doing so.
> + *
> + * @param insn  - The instruction whose ID is to be determined.
> + * @return      - 0 if the ModR/M could be read when needed or was not needed;
> + *                nonzero otherwise.
> + */
> +static int getID(struct InternalInstruction* insn) {
> +  uint8_t attrMask;
> +  uint16_t instructionID;
> +
> +  dprintf(insn, "getID()");
> +
> +  attrMask = ATTR_NONE;
> +
> +  if (insn->mode == MODE_64BIT)
> +    attrMask |= ATTR_64BIT;
> +
> +  if (insn->rexPrefix & 0x08)
> +    attrMask |= ATTR_REXW;
> +
> +  if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
> +    attrMask |= ATTR_OPSIZE;
> +  else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
> +    attrMask |= ATTR_XS;
> +  else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
> +    attrMask |= ATTR_XD;
> +
> +  if(getIDWithAttrMask(&instructionID, insn, attrMask))
> +    return -1;
> +
> +  /* The following clauses compensate for limitations of the tables. */
> +
> +  if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
> +    /*
> +     * Although for SSE instructions it is usually necessary to treat REX.W+F2
> +     * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
> +     * an occasional instruction where F2 is incidental and REX.W is the more
> +     * significant.  If the decoded instruction is 32-bit and adding REX.W
> +     * instead of F2 changes a 32 to a 64, we adopt the new encoding.
> +     */
> +
> +    struct InstructionSpecifier* spec;
> +    uint16_t instructionIDWithREXw;
> +    struct InstructionSpecifier* specWithREXw;
> +
> +    spec = specifierForUID(instructionID);
> +
> +    if (getIDWithAttrMask(&instructionIDWithREXw,
> +                          insn,
> +                          attrMask & (~ATTR_XD))) {
> +      /*
> +       * Decoding with REX.w would yield nothing; give up and return original
> +       * decode.
> +       */
> +
> +      insn->instructionID = instructionID;
> +      insn->spec = spec;
> +      return 0;
> +    }
> +
> +    specWithREXw = specifierForUID(instructionIDWithREXw);
> +
> +    if (is64BitEquivalent(spec->name, specWithREXw->name)) {
> +      insn->instructionID = instructionIDWithREXw;
> +      insn->spec = specWithREXw;
> +    } else {
> +      insn->instructionID = instructionID;
> +      insn->spec = spec;
> +    }
> +    return 0;
> +  }
> +
> +  if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
> +    /*
> +     * The instruction tables make no distinction between instructions that
> +     * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
> +     * particular spot (i.e., many MMX operations).  In general we're
> +     * conservative, but in the specific case where OpSize is present but not
> +     * in the right place we check if there's a 16-bit operation.
> +     */
> +
> +    struct InstructionSpecifier* spec;
> +    uint16_t instructionIDWithOpsize;
> +    struct InstructionSpecifier* specWithOpsize;
> +
> +    spec = specifierForUID(instructionID);
> +
> +    if (getIDWithAttrMask(&instructionIDWithOpsize,
> +                          insn,
> +                          attrMask | ATTR_OPSIZE)) {
> +      /*
> +       * ModRM required with OpSize but not present; give up and return version
> +       * without OpSize set
> +       */
> +
> +      insn->instructionID = instructionID;
> +      insn->spec = spec;
> +      return 0;
> +    }
> +
> +    specWithOpsize = specifierForUID(instructionIDWithOpsize);
> +
> +    if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
> +      insn->instructionID = instructionIDWithOpsize;
> +      insn->spec = specWithOpsize;
> +    } else {
> +      insn->instructionID = instructionID;
> +      insn->spec = spec;
> +    }
> +    return 0;
> +  }
> +
> +  insn->instructionID = instructionID;
> +  insn->spec = specifierForUID(insn->instructionID);
> +
> +  return 0;
> +}
> +
> +/*
> + * readSIB - Consumes the SIB byte to determine addressing information for an
> + *   instruction.
> + *
> + * @param insn  - The instruction whose SIB byte is to be read.
> + * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
> + */
> +static int readSIB(struct InternalInstruction* insn) {
> +  SIBIndex sibIndexBase;
> +  SIBBase sibBaseBase;
> +  uint8_t index, base;
> +
> +  dprintf(insn, "readSIB()");
> +
> +  if (insn->consumedSIB)
> +    return 0;
> +
> +  insn->consumedSIB = TRUE;
> +
> +  switch (insn->addressSize) {
> +  case 2:
> +    dprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
> +    return -1;
> +    break;
> +  case 4:
> +    sibIndexBase = SIB_INDEX_EAX;
> +    sibBaseBase = SIB_BASE_EAX;
> +    break;
> +  case 8:
> +    sibIndexBase = SIB_INDEX_RAX;
> +    sibBaseBase = SIB_BASE_RAX;
> +    break;
> +  }
> +
> +  if (consumeByte(insn, &insn->sib))
> +    return -1;
> +
> +  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
> +
> +  switch (index) {
> +  case 0x4:
> +    insn->sibIndex = SIB_INDEX_NONE;
> +    break;
> +  default:
> +    insn->sibIndex = (EABase)(sibIndexBase + index);
> +    if (insn->sibIndex == SIB_INDEX_sib ||
> +        insn->sibIndex == SIB_INDEX_sib64)
> +      insn->sibIndex = SIB_INDEX_NONE;
> +    break;
> +  }
> +
> +  switch (scaleFromSIB(insn->sib)) {
> +  case 0:
> +    insn->sibScale = 1;
> +    break;
> +  case 1:
> +    insn->sibScale = 2;
> +    break;
> +  case 2:
> +    insn->sibScale = 4;
> +    break;
> +  case 3:
> +    insn->sibScale = 8;
> +    break;
> +  }
> +
> +  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
> +
> +  switch (base) {
> +  case 0x5:
> +    switch (modFromModRM(insn->modRM)) {
> +    case 0x0:
> +      insn->eaDisplacement = EA_DISP_32;
> +      insn->sibBase = SIB_BASE_NONE;
> +      break;
> +    case 0x1:
> +      insn->eaDisplacement = EA_DISP_8;
> +      insn->sibBase = (insn->addressSize == 4 ?
> +                       SIB_BASE_EBP : SIB_BASE_RBP);
> +      break;
> +    case 0x2:
> +      insn->eaDisplacement = EA_DISP_32;
> +      insn->sibBase = (insn->addressSize == 4 ?
> +                       SIB_BASE_EBP : SIB_BASE_RBP);
> +      break;
> +    case 0x3:
> +      unreachable("Cannot have Mod = 0b11 and a SIB byte");
> +    }
> +    break;
> +  default:
> +    insn->sibBase = (EABase)(sibBaseBase + base);
> +    break;
> +  }
> +
> +  return 0;
> +}
> +
> +/*
> + * readDisplacement - Consumes the displacement of an instruction.
> + *
> + * @param insn  - The instruction whose displacement is to be read.
> + * @return      - 0 if the displacement byte was successfully read; nonzero
> + *                otherwise.
> + */
> +static int readDisplacement(struct InternalInstruction* insn) {
> +  int8_t d8;
> +  int16_t d16;
> +  int32_t d32;
> +
> +  dprintf(insn, "readDisplacement()");
> +
> +  if (insn->consumedDisplacement)
> +    return 0;
> +
> +  insn->consumedDisplacement = TRUE;
> +
> +  switch (insn->eaDisplacement) {
> +  case EA_DISP_NONE:
> +    insn->consumedDisplacement = FALSE;
> +    break;
> +  case EA_DISP_8:
> +    if (consumeInt8(insn, &d8))
> +      return -1;
> +    insn->displacement = d8;
> +    break;
> +  case EA_DISP_16:
> +    if (consumeInt16(insn, &d16))
> +      return -1;
> +    insn->displacement = d16;
> +    break;
> +  case EA_DISP_32:
> +    if (consumeInt32(insn, &d32))
> +      return -1;
> +    insn->displacement = d32;
> +    break;
> +  }
> +
> +  insn->consumedDisplacement = TRUE;
> +  return 0;
> +}
> +
> +/*
> + * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
> + *   displacement) for an instruction and interprets it.
> + *
> + * @param insn  - The instruction whose addressing information is to be read.
> + * @return      - 0 if the information was successfully read; nonzero otherwise.
> + */
> +static int readModRM(struct InternalInstruction* insn) {
> +  uint8_t mod, rm, reg;
> +
> +  dprintf(insn, "readModRM()");
> +
> +  if (insn->consumedModRM)
> +    return 0;
> +
> +  consumeByte(insn, &insn->modRM);
> +  insn->consumedModRM = TRUE;
> +
> +  mod     = modFromModRM(insn->modRM);
> +  rm      = rmFromModRM(insn->modRM);
> +  reg     = regFromModRM(insn->modRM);
> +
> +  /*
> +   * This goes by insn->registerSize to pick the correct register, which messes
> +   * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
> +   * fixupReg().
> +   */
> +  switch (insn->registerSize) {
> +  case 2:
> +    insn->regBase = REG_AX;
> +    insn->eaRegBase = EA_REG_AX;
> +    break;
> +  case 4:
> +    insn->regBase = REG_EAX;
> +    insn->eaRegBase = EA_REG_EAX;
> +    break;
> +  case 8:
> +    insn->regBase = REG_RAX;
> +    insn->eaRegBase = EA_REG_RAX;
> +    break;
> +  }
> +
> +  reg |= rFromREX(insn->rexPrefix) << 3;
> +  rm  |= bFromREX(insn->rexPrefix) << 3;
> +
> +  insn->reg = (Reg)(insn->regBase + reg);
> +
> +  switch (insn->addressSize) {
> +  case 2:
> +    insn->eaBaseBase = EA_BASE_BX_SI;
> +
> +    switch (mod) {
> +    case 0x0:
> +      if (rm == 0x6) {
> +        insn->eaBase = EA_BASE_NONE;
> +        insn->eaDisplacement = EA_DISP_16;
> +        if(readDisplacement(insn))
> +          return -1;
> +      } else {
> +        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
> +        insn->eaDisplacement = EA_DISP_NONE;
> +      }
> +      break;
> +    case 0x1:
> +      insn->eaBase = (EABase)(insn->eaBaseBase + rm);
> +      insn->eaDisplacement = EA_DISP_8;
> +      if(readDisplacement(insn))
> +        return -1;
> +      break;
> +    case 0x2:
> +      insn->eaBase = (EABase)(insn->eaBaseBase + rm);
> +      insn->eaDisplacement = EA_DISP_16;
> +      if(readDisplacement(insn))
> +        return -1;
> +      break;
> +    case 0x3:
> +      insn->eaBase = (EABase)(insn->eaRegBase + rm);
> +      if(readDisplacement(insn))
> +        return -1;
> +      break;
> +    }
> +    break;
> +  case 4:
> +  case 8:
> +    insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
> +
> +    switch (mod) {
> +    case 0x0:
> +      insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
> +      switch (rm) {
> +      case 0x4:
> +      case 0xc:   /* in case REXW.b is set */
> +        insn->eaBase = (insn->addressSize == 4 ?
> +                        EA_BASE_sib : EA_BASE_sib64);
> +        readSIB(insn);
> +        if(readDisplacement(insn))
> +          return -1;
> +        break;
> +      case 0x5:
> +        insn->eaBase = EA_BASE_NONE;
> +        insn->eaDisplacement = EA_DISP_32;
> +        if(readDisplacement(insn))
> +          return -1;
> +        break;
> +      default:
> +        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
> +        break;
> +      }
> +      break;
> +    case 0x1:
> +    case 0x2:
> +      insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
> +      switch (rm) {
> +      case 0x4:
> +      case 0xc:   /* in case REXW.b is set */
> +        insn->eaBase = EA_BASE_sib;
> +        readSIB(insn);
> +        if(readDisplacement(insn))
> +          return -1;
> +        break;
> +      default:
> +        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
> +        if(readDisplacement(insn))
> +          return -1;
> +        break;
> +      }
> +      break;
> +    case 0x3:
> +      insn->eaDisplacement = EA_DISP_NONE;
> +      insn->eaBase = (EABase)(insn->eaRegBase + rm);
> +      break;
> +    }
> +    break;
> +  } /* switch (insn->addressSize) */
> +
> +  return 0;
> +}
> +
> +#define GENERIC_FIXUP_FUNC(name, base, prefix)            \
> +  static uint8_t name(struct InternalInstruction *insn,   \
> +                      OperandType type,                   \
> +                      uint8_t index,                      \
> +                      uint8_t *valid) {                   \
> +    *valid = 1;                                           \
> +    switch (type) {                                       \
> +    default:                                              \
> +      unreachable("Unhandled register type");             \
> +    case TYPE_Rv:                                         \
> +      return base + index;                                \
> +    case TYPE_R8:                                         \
> +      if(insn->rexPrefix &&                               \
> +         index >= 4 && index <= 7) {                      \
> +        return prefix##_SPL + (index - 4);                \
> +      } else {                                            \
> +        return prefix##_AL + index;                       \
> +      }                                                   \
> +    case TYPE_R16:                                        \
> +      return prefix##_AX + index;                         \
> +    case TYPE_R32:                                        \
> +      return prefix##_EAX + index;                        \
> +    case TYPE_R64:                                        \
> +      return prefix##_RAX + index;                        \
> +    case TYPE_XMM128:                                     \
> +    case TYPE_XMM64:                                      \
> +    case TYPE_XMM32:                                      \
> +    case TYPE_XMM:                                        \
> +      return prefix##_XMM0 + index;                       \
> +    case TYPE_MM64:                                       \
> +    case TYPE_MM32:                                       \
> +    case TYPE_MM:                                         \
> +      if(index > 7)                                       \
> +        *valid = 0;                                       \
> +      return prefix##_MM0 + index;                        \
> +    case TYPE_SEGMENTREG:                                 \
> +      if(index > 5)                                       \
> +        *valid = 0;                                       \
> +      return prefix##_ES + index;                         \
> +    case TYPE_DEBUGREG:                                   \
> +      if(index > 7)                                       \
> +        *valid = 0;                                       \
> +      return prefix##_DR0 + index;                        \
> +    case TYPE_CR32:                                       \
> +      if(index > 7)                                       \
> +        *valid = 0;                                       \
> +      return prefix##_ECR0 + index;                       \
> +    case TYPE_CR64:                                       \
> +      if(index > 8)                                       \
> +        *valid = 0;                                       \
> +      return prefix##_RCR0 + index;                       \
> +    }                                                     \
> +  }
> +
> +/*
> + * fixup*Value - Consults an operand type to determine the meaning of the
> + *   reg or R/M field.  If the operand is an XMM operand, for example, an
> + *   operand would be XMM0 instead of AX, which readModRM() would otherwise
> + *   misinterpret it as.
> + *
> + * @param insn  - The instruction containing the operand.
> + * @param type  - The operand type.
> + * @param index - The existing value of the field as reported by readModRM().
> + * @param valid - The address of a uint8_t.  The target is set to 1 if the
> + *                field is valid for the register class; 0 if not.
> + */
> +GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    REG)
> +GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
> +
> +/*
> + * fixupReg - Consults an operand specifier to determine which of the
> + *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
> + *
> + * @param insn  - See fixup*Value().
> + * @param op    - The operand specifier.
> + * @return      - 0 if fixup was successful; -1 if the register returned was
> + *                invalid for its class.
> + */
> +static int fixupReg(struct InternalInstruction *insn,
> +                    struct OperandSpecifier *op) {
> +  uint8_t valid;
> +
> +  dprintf(insn, "fixupReg()");
> +
> +  switch ((OperandEncoding)op->encoding) {
> +  default:
> +    unreachable("Expected a REG or R/M encoding in fixupReg");
> +  case ENCODING_REG:
> +    insn->reg = (Reg)fixupRegValue(insn,
> +                                   (OperandType)op->type,
> +                                   insn->reg - insn->regBase,
> +                                   &valid);
> +    if (!valid)
> +      return -1;
> +    break;
> +  case ENCODING_RM:
> +    if (insn->eaBase >= insn->eaRegBase) {
> +      insn->eaBase = (EABase)fixupRMValue(insn,
> +                                          (OperandType)op->type,
> +                                          insn->eaBase - insn->eaRegBase,
> +                                          &valid);
> +      if (!valid)
> +        return -1;
> +    }
> +    break;
> +  }
> +
> +  return 0;
> +}
> +
> +/*
> + * readOpcodeModifier - Reads an operand from the opcode field of an
> + *   instruction.  Handles AddRegFrm instructions.
> + *
> + * @param insn    - The instruction whose opcode field is to be read.
> + * @param inModRM - Indicates that the opcode field is to be read from the
> + *                  ModR/M extension; useful for escape opcodes
> + */
> +static void readOpcodeModifier(struct InternalInstruction* insn) {
> +  dprintf(insn, "readOpcodeModifier()");
> +
> +  if (insn->consumedOpcodeModifier)
> +    return;
> +
> +  insn->consumedOpcodeModifier = TRUE;
> +
> +  switch(insn->spec->modifierType) {
> +  default:
> +    unreachable("Unknown modifier type.");
> +  case MODIFIER_NONE:
> +    unreachable("No modifier but an operand expects one.");
> +  case MODIFIER_OPCODE:
> +    insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
> +    break;
> +  case MODIFIER_MODRM:
> +    insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
> +    break;
> +  }
> +}
> +
> +/*
> + * readOpcodeRegister - Reads an operand from the opcode field of an
> + *   instruction and interprets it appropriately given the operand width.
> + *   Handles AddRegFrm instructions.
> + *
> + * @param insn  - See readOpcodeModifier().
> + * @param size  - The width (in bytes) of the register being specified.
> + *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
> + *                RAX.
> + */
> +static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
> +  dprintf(insn, "readOpcodeRegister()");
> +
> +  readOpcodeModifier(insn);
> +
> +  if (size == 0)
> +    size = insn->registerSize;
> +
> +  switch (size) {
> +  case 1:
> +    insn->opcodeRegister = (Reg)(REG_AL + ((bFromREX(insn->rexPrefix) << 3)
> +                                           | insn->opcodeModifier));
> +    if(insn->rexPrefix &&
> +       insn->opcodeRegister >= REG_AL + 0x4 &&
> +       insn->opcodeRegister < REG_AL + 0x8) {
> +      insn->opcodeRegister = (Reg)(REG_SPL + (insn->opcodeRegister - REG_AL - 4));
> +    }
> +
> +    break;
> +  case 2:
> +    insn->opcodeRegister = (Reg)(REG_AX + ((bFromREX(insn->rexPrefix) << 3)
> +                                            | insn->opcodeModifier));
> +    break;
> +  case 4:
> +    insn->opcodeRegister = (Reg)(REG_EAX + ((bFromREX(insn->rexPrefix) << 3)
> +                                             | insn->opcodeModifier));
> +    break;
> +  case 8:
> +    insn->opcodeRegister = (Reg)(REG_RAX + ((bFromREX(insn->rexPrefix) << 3)
> +                                             |insn->opcodeModifier));
> +    break;
> +  }
> +}
> +
> +/*
> + * readImmediate - Consumes an immediate operand from an instruction, given the
> + *   desired operand size.
> + *
> + * @param insn  - The instruction whose operand is to be read.
> + * @param size  - The width (in bytes) of the operand.
> + * @return      - 0 if the immediate was successfully consumed; nonzero
> + *                otherwise.
> + */
> +static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
> +  uint8_t imm8;
> +  uint16_t imm16;
> +  uint32_t imm32;
> +  uint64_t imm64;
> +
> +  dprintf(insn, "readImmediate()");
> +
> +  if (insn->numImmediatesConsumed == 2)
> +    unreachable("Already consumed two immediates");
> +
> +  if (size == 0)
> +    size = insn->immediateSize;
> +  else
> +    insn->immediateSize = size;
> +
> +  switch (size) {
> +  case 1:
> +    if (consumeByte(insn, &imm8))
> +      return -1;
> +    insn->immediates[insn->numImmediatesConsumed] = imm8;
> +    break;
> +  case 2:
> +    if (consumeUInt16(insn, &imm16))
> +      return -1;
> +    insn->immediates[insn->numImmediatesConsumed] = imm16;
> +    break;
> +  case 4:
> +    if (consumeUInt32(insn, &imm32))
> +      return -1;
> +    insn->immediates[insn->numImmediatesConsumed] = imm32;
> +    break;
> +  case 8:
> +    if (consumeUInt64(insn, &imm64))
> +      return -1;
> +    insn->immediates[insn->numImmediatesConsumed] = imm64;
> +    break;
> +  }
> +
> +  insn->numImmediatesConsumed++;
> +
> +  return 0;
> +}
> +
> +/*
> + * readOperands - Consults the specifier for an instruction and consumes all
> + *   operands for that instruction, interpreting them as it goes.
> + *
> + * @param insn  - The instruction whose operands are to be read and interpreted.
> + * @return      - 0 if all operands could be read; nonzero otherwise.
> + */
> +static int readOperands(struct InternalInstruction* insn) {
> +  int index;
> +
> +  dprintf(insn, "readOperands()");
> +
> +  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
> +    switch (insn->spec->operands[index].encoding) {
> +    case ENCODING_NONE:
> +      break;
> +    case ENCODING_REG:
> +    case ENCODING_RM:
> +      if (readModRM(insn))
> +        return -1;
> +      if (fixupReg(insn, &insn->spec->operands[index]))
> +        return -1;
> +      break;
> +    case ENCODING_CB:
> +    case ENCODING_CW:
> +    case ENCODING_CD:
> +    case ENCODING_CP:
> +    case ENCODING_CO:
> +    case ENCODING_CT:
> +      dprintf(insn, "We currently don't hande code-offset encodings");
> +      return -1;
> +    case ENCODING_IB:
> +      if (readImmediate(insn, 1))
> +        return -1;
> +      break;
> +    case ENCODING_IW:
> +      if (readImmediate(insn, 2))
> +        return -1;
> +      break;
> +    case ENCODING_ID:
> +      if (readImmediate(insn, 4))
> +        return -1;
> +      break;
> +    case ENCODING_IO:
> +      if (readImmediate(insn, 8))
> +        return -1;
> +      break;
> +    case ENCODING_Iv:
> +      readImmediate(insn, insn->immediateSize);
> +      break;
> +    case ENCODING_Ia:
> +      readImmediate(insn, insn->addressSize);
> +      break;
> +    case ENCODING_RB:
> +      readOpcodeRegister(insn, 1);
> +      break;
> +    case ENCODING_RW:
> +      readOpcodeRegister(insn, 2);
> +      break;
> +    case ENCODING_RD:
> +      readOpcodeRegister(insn, 4);
> +      break;
> +    case ENCODING_RO:
> +      readOpcodeRegister(insn, 8);
> +      break;
> +    case ENCODING_Rv:
> +      readOpcodeRegister(insn, 0);
> +      break;
> +    case ENCODING_I:
> +      readOpcodeModifier(insn);
> +      break;
> +    case ENCODING_DUP:
> +      break;
> +    default:
> +      dprintf(insn, "Encountered an operand with an unknown encoding.");
> +      return -1;
> +    }
> +  }
> +
> +  return 0;
> +}
> +
> +/*
> + * decodeInstruction - Reads and interprets a full instruction provided by the
> + *   user.
> + *
> + * @param insn      - A pointer to the instruction to be populated.  Must be
> + *                    pre-allocated.
> + * @param reader    - The function to be used to read the instruction's bytes.
> + * @param readerArg - A generic argument to be passed to the reader to store
> + *                    any internal state.
> + * @param logger    - If non-NULL, the function to be used to write log messages
> + *                    and warnings.
> + * @param loggerArg - A generic argument to be passed to the logger to store
> + *                    any internal state.
> + * @param startLoc  - The address (in the reader's address space) of the first
> + *                    byte in the instruction.
> + * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
> + *                    decode the instruction in.
> + * @return          - 0 if the instruction's memory could be read; nonzero if
> + *                    not.
> + */
> +int decodeInstruction(struct InternalInstruction* insn,
> +                      byteReader_t reader,
> +                      void* readerArg,
> +                      dlog_t logger,
> +                      void* loggerArg,
> +                      uint64_t startLoc,
> +                      DisassemblerMode mode) {
> +  bzero(insn, sizeof(struct InternalInstruction));
> +
> +  insn->reader = reader;
> +  insn->readerArg = readerArg;
> +  insn->dlog = logger;
> +  insn->dlogArg = loggerArg;
> +  insn->startLocation = startLoc;
> +  insn->readerCursor = startLoc;
> +  insn->mode = mode;
> +  insn->numImmediatesConsumed = 0;
> +
> +  if (readPrefixes(insn)       ||
> +      readOpcode(insn)         ||
> +      getID(insn)              ||
> +      insn->instructionID == 0 ||
> +      readOperands(insn))
> +    return -1;
> +
> +  insn->length = insn->readerCursor - insn->startLocation;
> +
> +  dprintf(insn, "Read from 0x%llx to 0x%llx: length %llu",
> +          startLoc, insn->readerCursor, insn->length);
> +
> +  if (insn->length > 15)
> +    dprintf(insn, "Instruction exceeds 15-byte limit");
> +
> +  return 0;
> +}
>
> Added: llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h?rev=91749&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h (added)
> +++ llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h Fri Dec 18 20:59:52 2009
> @@ -0,0 +1,515 @@
> +/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
> + *
> + *                     The LLVM Compiler Infrastructure
> + *
> + * This file is distributed under the University of Illinois Open Source
> + * License. See LICENSE.TXT for details.
> + *
> + *===----------------------------------------------------------------------===*
> + *
> + * This file is part of the X86 Disassembler.
> + * It contains the public interface of the instruction decoder.
> + * Documentation for the disassembler can be found in X86Disassembler.h.
> + *
> + *===----------------------------------------------------------------------===*/
> +
> +#ifndef X86DISASSEMBLERDECODER_H
> +#define X86DISASSEMBLERDECODER_H
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#define INSTRUCTION_SPECIFIER_FIELDS  \
> +  const char*             name;
> +
> +#define INSTRUCTION_IDS     \
> +  InstrUID*  instructionIDs;
> +
> +#include "X86DisassemblerDecoderCommon.h"
> +
> +#undef INSTRUCTION_SPECIFIER_FIELDS
> +#undef INSTRUCTION_IDS
> +
> +/*
> + * Accessor functions for various fields of an Intel instruction
> + */
> +static inline uint8_t modFromModRM(uint8_t modRM){ return (modRM & 0xc0) >> 6; }
> +static inline uint8_t regFromModRM(uint8_t modRM){ return (modRM & 0x38) >> 3; }
> +static inline uint8_t rmFromModRM(uint8_t modRM) { return (modRM & 0x7);       }
> +static inline uint8_t scaleFromSIB(uint8_t sib)  { return (sib & 0xc0) >> 6;   }
> +static inline uint8_t indexFromSIB(uint8_t sib)  { return (sib & 0x38) >> 3;   }
> +static inline uint8_t baseFromSIB(uint8_t sib)   { return (sib & 0x7);         }
> +static inline uint8_t wFromREX(uint8_t rex)      { return (rex & 0x8) >> 3;    }
> +static inline uint8_t rFromREX(uint8_t rex)      { return (rex & 0x4) >> 2;    }
> +static inline uint8_t xFromREX(uint8_t rex)      { return (rex & 0x2) >> 1;    }
> +static inline uint8_t bFromREX(uint8_t rex)      { return (rex & 0x1);         }
> +
> +/*
> + * These enums represent Intel registers for use by the decoder.
> + */
> +
> +#define REGS_8BIT     \
> +  ENTRY(AL)           \
> +  ENTRY(CL)           \
> +  ENTRY(DL)           \
> +  ENTRY(BL)           \
> +  ENTRY(AH)           \
> +  ENTRY(CH)           \
> +  ENTRY(DH)           \
> +  ENTRY(BH)           \
> +  ENTRY(R8B)          \
> +  ENTRY(R9B)          \
> +  ENTRY(R10B)         \
> +  ENTRY(R11B)         \
> +  ENTRY(R12B)         \
> +  ENTRY(R13B)         \
> +  ENTRY(R14B)         \
> +  ENTRY(R15B)         \
> +  ENTRY(SPL)          \
> +  ENTRY(BPL)          \
> +  ENTRY(SIL)          \
> +  ENTRY(DIL)
> +
> +#define EA_BASES_16BIT  \
> +  ENTRY(BX_SI)          \
> +  ENTRY(BX_DI)          \
> +  ENTRY(BP_SI)          \
> +  ENTRY(BP_DI)          \
> +  ENTRY(SI)             \
> +  ENTRY(DI)             \
> +  ENTRY(BP)             \
> +  ENTRY(BX)             \
> +  ENTRY(R8W)            \
> +  ENTRY(R9W)            \
> +  ENTRY(R10W)           \
> +  ENTRY(R11W)           \
> +  ENTRY(R12W)           \
> +  ENTRY(R13W)           \
> +  ENTRY(R14W)           \
> +  ENTRY(R15W)
> +
> +#define REGS_16BIT    \
> +  ENTRY(AX)           \
> +  ENTRY(CX)           \
> +  ENTRY(DX)           \
> +  ENTRY(BX)           \
> +  ENTRY(SP)           \
> +  ENTRY(BP)           \
> +  ENTRY(SI)           \
> +  ENTRY(DI)           \
> +  ENTRY(R8W)          \
> +  ENTRY(R9W)          \
> +  ENTRY(R10W)         \
> +  ENTRY(R11W)         \
> +  ENTRY(R12W)         \
> +  ENTRY(R13W)         \
> +  ENTRY(R14W)         \
> +  ENTRY(R15W)
> +
> +#define EA_BASES_32BIT  \
> +  ENTRY(EAX)            \
> +  ENTRY(ECX)            \
> +  ENTRY(EDX)            \
> +  ENTRY(EBX)            \
> +  ENTRY(sib)            \
> +  ENTRY(EBP)            \
> +  ENTRY(ESI)            \
> +  ENTRY(EDI)            \
> +  ENTRY(R8D)            \
> +  ENTRY(R9D)            \
> +  ENTRY(R10D)           \
> +  ENTRY(R11D)           \
> +  ENTRY(R12D)           \
> +  ENTRY(R13D)           \
> +  ENTRY(R14D)           \
> +  ENTRY(R15D)
> +
> +#define REGS_32BIT  \
> +  ENTRY(EAX)        \
> +  ENTRY(ECX)        \
> +  ENTRY(EDX)        \
> +  ENTRY(EBX)        \
> +  ENTRY(ESP)        \
> +  ENTRY(EBP)        \
> +  ENTRY(ESI)        \
> +  ENTRY(EDI)        \
> +  ENTRY(R8D)        \
> +  ENTRY(R9D)        \
> +  ENTRY(R10D)       \
> +  ENTRY(R11D)       \
> +  ENTRY(R12D)       \
> +  ENTRY(R13D)       \
> +  ENTRY(R14D)       \
> +  ENTRY(R15D)
> +
> +#define EA_BASES_64BIT  \
> +  ENTRY(RAX)            \
> +  ENTRY(RCX)            \
> +  ENTRY(RDX)            \
> +  ENTRY(RBX)            \
> +  ENTRY(sib64)          \
> +  ENTRY(RBP)            \
> +  ENTRY(RSI)            \
> +  ENTRY(RDI)            \
> +  ENTRY(R8)             \
> +  ENTRY(R9)             \
> +  ENTRY(R10)            \
> +  ENTRY(R11)            \
> +  ENTRY(R12)            \
> +  ENTRY(R13)            \
> +  ENTRY(R14)            \
> +  ENTRY(R15)
> +
> +#define REGS_64BIT  \
> +  ENTRY(RAX)        \
> +  ENTRY(RCX)        \
> +  ENTRY(RDX)        \
> +  ENTRY(RBX)        \
> +  ENTRY(RSP)        \
> +  ENTRY(RBP)        \
> +  ENTRY(RSI)        \
> +  ENTRY(RDI)        \
> +  ENTRY(R8)         \
> +  ENTRY(R9)         \
> +  ENTRY(R10)        \
> +  ENTRY(R11)        \
> +  ENTRY(R12)        \
> +  ENTRY(R13)        \
> +  ENTRY(R14)        \
> +  ENTRY(R15)
> +
> +#define REGS_MMX  \
> +  ENTRY(MM0)      \
> +  ENTRY(MM1)      \
> +  ENTRY(MM2)      \
> +  ENTRY(MM3)      \
> +  ENTRY(MM4)      \
> +  ENTRY(MM5)      \
> +  ENTRY(MM6)      \
> +  ENTRY(MM7)
> +
> +#define REGS_XMM  \
> +  ENTRY(XMM0)     \
> +  ENTRY(XMM1)     \
> +  ENTRY(XMM2)     \
> +  ENTRY(XMM3)     \
> +  ENTRY(XMM4)     \
> +  ENTRY(XMM5)     \
> +  ENTRY(XMM6)     \
> +  ENTRY(XMM7)     \
> +  ENTRY(XMM8)     \
> +  ENTRY(XMM9)     \
> +  ENTRY(XMM10)    \
> +  ENTRY(XMM11)    \
> +  ENTRY(XMM12)    \
> +  ENTRY(XMM13)    \
> +  ENTRY(XMM14)    \
> +  ENTRY(XMM15)
> +
> +#define REGS_SEGMENT \
> +  ENTRY(ES)          \
> +  ENTRY(CS)          \
> +  ENTRY(SS)          \
> +  ENTRY(DS)          \
> +  ENTRY(FS)          \
> +  ENTRY(GS)
> +
> +#define REGS_DEBUG  \
> +  ENTRY(DR0)        \
> +  ENTRY(DR1)        \
> +  ENTRY(DR2)        \
> +  ENTRY(DR3)        \
> +  ENTRY(DR4)        \
> +  ENTRY(DR5)        \
> +  ENTRY(DR6)        \
> +  ENTRY(DR7)
> +
> +#define REGS_CONTROL_32BIT  \
> +  ENTRY(ECR0)               \
> +  ENTRY(ECR1)               \
> +  ENTRY(ECR2)               \
> +  ENTRY(ECR3)               \
> +  ENTRY(ECR4)               \
> +  ENTRY(ECR5)               \
> +  ENTRY(ECR6)               \
> +  ENTRY(ECR7)
> +
> +#define REGS_CONTROL_64BIT  \
> +  ENTRY(RCR0)               \
> +  ENTRY(RCR1)               \
> +  ENTRY(RCR2)               \
> +  ENTRY(RCR3)               \
> +  ENTRY(RCR4)               \
> +  ENTRY(RCR5)               \
> +  ENTRY(RCR6)               \
> +  ENTRY(RCR7)               \
> +  ENTRY(RCR8)
> +
> +#define ALL_EA_BASES  \
> +  EA_BASES_16BIT      \
> +  EA_BASES_32BIT      \
> +  EA_BASES_64BIT
> +
> +#define ALL_SIB_BASES \
> +  REGS_32BIT          \
> +  REGS_64BIT
> +
> +#define ALL_REGS      \
> +  REGS_8BIT           \
> +  REGS_16BIT          \
> +  REGS_32BIT          \
> +  REGS_64BIT          \
> +  REGS_MMX            \
> +  REGS_XMM            \
> +  REGS_SEGMENT        \
> +  REGS_DEBUG          \
> +  REGS_CONTROL_32BIT  \
> +  REGS_CONTROL_64BIT  \
> +  ENTRY(RIP)
> +
> +/*
> + * EABase - All possible values of the base field for effective-address
> + *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
> + *   distinguish between bases (EA_BASE_*) and registers that just happen to be
> + *   referred to when Mod == 0b11 (EA_REG_*).
> + */
> +typedef enum {
> +  EA_BASE_NONE,
> +#define ENTRY(x) EA_BASE_##x,
> +  ALL_EA_BASES
> +#undef ENTRY
> +#define ENTRY(x) EA_REG_##x,
> +  ALL_REGS
> +#undef ENTRY
> +  EA_max
> +} EABase;
> +
> +/*
> + * SIBIndex - All possible values of the SIB index field.
> + *   Borrows entries from ALL_EA_BASES with the special case that
> + *   sib is synonymous with NONE.
> + */
> +typedef enum {
> +  SIB_INDEX_NONE,
> +#define ENTRY(x) SIB_INDEX_##x,
> +  ALL_EA_BASES
> +#undef ENTRY
> +  SIB_INDEX_max
> +} SIBIndex;
> +
> +/*
> + * SIBBase - All possible values of the SIB base field.
> + */
> +typedef enum {
> +  SIB_BASE_NONE,
> +#define ENTRY(x) SIB_BASE_##x,
> +  ALL_SIB_BASES
> +#undef ENTRY
> +  SIB_BASE_max
> +} SIBBase;
> +
> +/*
> + * EADisplacement - Possible displacement types for effective-address
> + *   computations.
> + */
> +typedef enum {
> +  EA_DISP_NONE,
> +  EA_DISP_8,
> +  EA_DISP_16,
> +  EA_DISP_32
> +} EADisplacement;
> +
> +/*
> + * Reg - All possible values of the reg field in the ModR/M byte.
> + */
> +typedef enum {
> +#define ENTRY(x) REG_##x,
> +  ALL_REGS
> +#undef ENTRY
> +  REG_max
> +} Reg;
> +
> +/*
> + * SegmentOverride - All possible segment overrides.
> + */
> +typedef enum {
> +  SEG_OVERRIDE_NONE,
> +  SEG_OVERRIDE_CS,
> +  SEG_OVERRIDE_SS,
> +  SEG_OVERRIDE_DS,
> +  SEG_OVERRIDE_ES,
> +  SEG_OVERRIDE_FS,
> +  SEG_OVERRIDE_GS,
> +  SEG_OVERRIDE_max
> +} SegmentOverride;
> +
> +typedef uint8_t BOOL;
> +
> +/*
> + * byteReader_t - Type for the byte reader that the consumer must provide to
> + *   the decoder.  Reads a single byte from the instruction's address space.
> + * @param arg     - A baton that the consumer can associate with any internal
> + *                  state that it needs.
> + * @param byte    - A pointer to a single byte in memory that should be set to
> + *                  contain the value at address.
> + * @param address - The address in the instruction's address space that should
> + *                  be read from.
> + * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
> + */
> +typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
> +
> +/*
> + * dlog_t - Type for the logging function that the consumer can provide to
> + *   get debugging output from the decoder.
> + * @param arg     - A baton that the consumer can associate with any internal
> + *                  state that it needs.
> + * @param log     - A string that contains the message.  Will be reused after
> + *                  the logger returns.
> + */
> +typedef void (*dlog_t)(void* arg, const char *log);
> +
> +/*
> + * The x86 internal instruction, which is produced by the decoder.
> + */
> +struct InternalInstruction {
> +  /* Reader interface (C) */
> +  byteReader_t reader;
> +  /* Opaque value passed to the reader */
> +  void* readerArg;
> +  /* The address of the next byte to read via the reader */
> +  uint64_t readerCursor;
> +
> +  /* Logger interface (C) */
> +  dlog_t dlog;
> +  /* Opaque value passed to the logger */
> +  void* dlogArg;
> +
> +  /* General instruction information */
> +
> +  /* The mode to disassemble for (64-bit, protected, real) */
> +  DisassemblerMode mode;
> +  /* The start of the instruction, usable with the reader */
> +  uint64_t startLocation;
> +  /* The length of the instruction, in bytes */
> +  size_t length;
> +
> +  /* Prefix state */
> +
> +  /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
> +  uint8_t prefixPresent[0x100];
> +  /* contains the location (for use with the reader) of the prefix byte */
> +  uint64_t prefixLocations[0x100];
> +  /* The value of the REX prefix, if present */
> +  uint8_t rexPrefix;
> +  /* The location of the REX prefix */
> +  uint64_t rexLocation;
> +  /* The location where a mandatory prefix would have to be (i.e., right before
> +     the opcode, or right before the REX prefix if one is present) */
> +  uint64_t necessaryPrefixLocation;
> +  /* The segment override type */
> +  SegmentOverride segmentOverride;
> +
> +  /* Sizes of various critical pieces of data */
> +  uint8_t registerSize;
> +  uint8_t addressSize;
> +  uint8_t displacementSize;
> +  uint8_t immediateSize;
> +
> +  /* opcode state */
> +
> +  /* The value of the two-byte escape prefix (usually 0x0f) */
> +  uint8_t twoByteEscape;
> +  /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
> +  uint8_t threeByteEscape;
> +  /* The last byte of the opcode, not counting any ModR/M extension */
> +  uint8_t opcode;
> +  /* The ModR/M byte of the instruction, if it is an opcode extension */
> +  uint8_t modRMExtension;
> +
> +  /* decode state */
> +
> +  /* The type of opcode, used for indexing into the array of decode tables */
> +  OpcodeType opcodeType;
> +  /* The instruction ID, extracted from the decode table */
> +  uint16_t instructionID;
> +  /* The specifier for the instruction, from the instruction info table */
> +  struct InstructionSpecifier* spec;
> +
> +  /* state for additional bytes, consumed during operand decode.  Pattern:
> +     consumed___ indicates that the byte was already consumed and does not
> +     need to be consumed again */
> +
> +  /* The ModR/M byte, which contains most register operands and some portion of
> +     all memory operands */
> +  BOOL                          consumedModRM;
> +  uint8_t                       modRM;
> +
> +  /* The SIB byte, used for more complex 32- or 64-bit memory operands */
> +  BOOL                          consumedSIB;
> +  uint8_t                       sib;
> +
> +  /* The displacement, used for memory operands */
> +  BOOL                          consumedDisplacement;
> +  int32_t                       displacement;
> +
> +  /* Immediates.  There can be two in some cases */
> +  uint8_t                       numImmediatesConsumed;
> +  uint8_t                       numImmediatesTranslated;
> +  uint64_t                      immediates[2];
> +
> +  /* A register or immediate operand encoded into the opcode */
> +  BOOL                          consumedOpcodeModifier;
> +  uint8_t                       opcodeModifier;
> +  Reg                           opcodeRegister;
> +
> +  /* Portions of the ModR/M byte */
> +
> +  /* These fields determine the allowable values for the ModR/M fields, which
> +     depend on operand and address widths */
> +  EABase                        eaBaseBase;
> +  EABase                        eaRegBase;
> +  Reg                           regBase;
> +
> +  /* The Mod and R/M fields can encode a base for an effective address, or a
> +     register.  These are separated into two fields here */
> +  EABase                        eaBase;
> +  EADisplacement                eaDisplacement;
> +  /* The reg field always encodes a register */
> +  Reg                           reg;
> +
> +  /* SIB state */
> +  SIBIndex                      sibIndex;
> +  uint8_t                       sibScale;
> +  SIBBase                       sibBase;
> +};
> +
> +/* decodeInstruction - Decode one instruction and store the decoding results in
> + *   a buffer provided by the consumer.
> + * @param insn      - The buffer to store the instruction in.  Allocated by the
> + *                    consumer.
> + * @param reader    - The byteReader_t for the bytes to be read.
> + * @param readerArg - An argument to pass to the reader for storing context
> + *                    specific to the consumer.  May be NULL.
> + * @param logger    - The dlog_t to be used in printing status messages from the
> + *                    disassembler.  May be NULL.
> + * @param loggerArg - An argument to pass to the logger for storing context
> + *                    specific to the logger.  May be NULL.
> + * @param startLoc  - The address (in the reader's address space) of the first
> + *                    byte in the instruction.
> + * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
> + * @return          - Nonzero if there was an error during decode, 0 otherwise.
> + */
> +int decodeInstruction(struct InternalInstruction* insn,
> +                      byteReader_t reader,
> +                      void* readerArg,
> +                      dlog_t logger,
> +                      void* loggerArg,
> +                      uint64_t startLoc,
> +                      DisassemblerMode mode);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h?rev=91749&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h (added)
> +++ llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h Fri Dec 18 20:59:52 2009
> @@ -0,0 +1,354 @@
> +/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
> + *
> + *                     The LLVM Compiler Infrastructure
> + *
> + * This file is distributed under the University of Illinois Open Source
> + * License. See LICENSE.TXT for details.
> + *
> + *===----------------------------------------------------------------------===*
> + *
> + * This file is part of the X86 Disassembler.
> + * It contains common definitions used by both the disassembler and the table
> + *  generator.
> + * Documentation for the disassembler can be found in X86Disassembler.h.
> + *
> + *===----------------------------------------------------------------------===*/
> +
> +/*
> + * This header file provides those definitions that need to be shared between
> + * the decoder and the table generator in a C-friendly manner.
> + */
> +
> +#ifndef X86DISASSEMBLERDECODERCOMMON_H
> +#define X86DISASSEMBLERDECODERCOMMON_H
> +
> +#include "llvm/System/DataTypes.h"
> +
> +#define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers
> +#define CONTEXTS_SYM      x86DisassemblerContexts
> +#define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes
> +#define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes
> +#define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes
> +#define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes
> +
> +#define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
> +#define CONTEXTS_STR      "x86DisassemblerContexts"
> +#define ONEBYTE_STR       "x86DisassemblerOneByteOpcodes"
> +#define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"
> +#define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"
> +#define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes"
> +
> +/*
> + * Attributes of an instruction that must be known before the opcode can be
> + * processed correctly.  Most of these indicate the presence of particular
> + * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
> + */
> +#define ATTRIBUTE_BITS          \
> +  ENUM_ENTRY(ATTR_NONE,   0x00) \
> +  ENUM_ENTRY(ATTR_64BIT,  0x01) \
> +  ENUM_ENTRY(ATTR_XS,     0x02) \
> +  ENUM_ENTRY(ATTR_XD,     0x04) \
> +  ENUM_ENTRY(ATTR_REXW,   0x08) \
> +  ENUM_ENTRY(ATTR_OPSIZE, 0x10)
> +
> +#define ENUM_ENTRY(n, v) n = v,
> +enum attributeBits {
> +  ATTRIBUTE_BITS
> +  ATTR_max
> +};
> +#undef ENUM_ENTRY
> +
> +/*
> + * Combinations of the above attributes that are relevant to instruction
> + * decode.  Although other combinations are possible, they can be reduced to
> + * these without affecting the ultimately decoded instruction.
> + */
> +
> +/*           Class name           Rank  Rationale for rank assignment         */
> +#define INSTRUCTION_CONTEXTS                                                   \
> +  ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \
> +  ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \
> +                                        "64-bit mode but no more")             \
> +  ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \
> +                                        "operands change width")               \
> +  ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
> +                                        "but not the operands")                \
> +  ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
> +                                        "but not the operands")                \
> +  ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\
> +                                        "change width; overrides IC_OPSIZE")   \
> +  ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \
> +  ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \
> +                                        "secondary")                           \
> +  ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \
> +  ENUM_ENTRY(IC_64BIT_REXW_XS,      6,  "OPSIZE could mean a different "       \
> +                                        "opcode")                              \
> +  ENUM_ENTRY(IC_64BIT_REXW_XD,      6,  "Just as meaningful as "               \
> +                                        "IC_64BIT_REXW_XS")                    \
> +  ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \
> +                                        "else because this changes most "      \
> +                                        "operands' meaning")
> +
> +#define ENUM_ENTRY(n, r, d) n,
> +typedef enum {
> +  INSTRUCTION_CONTEXTS
> +  IC_max
> +} InstructionContext;
> +#undef ENUM_ENTRY
> +
> +/*
> + * Opcode types, which determine which decode table to use, both in the Intel
> + * manual and also for the decoder.
> + */
> +typedef enum {
> +  ONEBYTE       = 0,
> +  TWOBYTE       = 1,
> +  THREEBYTE_38  = 2,
> +  THREEBYTE_3A  = 3
> +} OpcodeType;
> +
> +/*
> + * The following structs are used for the hierarchical decode table.  After
> + * determining the instruction's class (i.e., which IC_* constant applies to
> + * it), the decoder reads the opcode.  Some instructions require specific
> + * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
> + *
> + * If a ModR/M byte is not required, "required" is left unset, and the values
> + * for each instructionID are identical.
> + */
> +
> +typedef uint16_t InstrUID;
> +
> +/*
> + * ModRMDecisionType - describes the type of ModR/M decision, allowing the
> + * consumer to determine the number of entries in it.
> + *
> + * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
> + *                  instruction is the same.
> + * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode
> + *                  corresponds to one instruction; otherwise, it corresponds to
> + *                  a different instruction.
> + * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond
> + *                  to a different instruction.
> + */
> +
> +#define MODRMTYPES            \
> +  ENUM_ENTRY(MODRM_ONEENTRY)  \
> +  ENUM_ENTRY(MODRM_SPLITRM)   \
> +  ENUM_ENTRY(MODRM_FULL)
> +
> +#define ENUM_ENTRY(n) n,
> +typedef enum {
> +  MODRMTYPES
> +  MODRM_max
> +} ModRMDecisionType;
> +#undef ENUM_ENTRY
> +
> +/*
> + * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
> + *  instruction each possible value of the ModR/M byte corresponds to.  Once
> + *  this information is known, we have narrowed down to a single instruction.
> + */
> +struct ModRMDecision {
> +  uint8_t     modrm_type;
> +
> +  /* The macro below must be defined wherever this file is included. */
> +  INSTRUCTION_IDS
> +};
> +
> +/*
> + * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
> + *   given a particular opcode.
> + */
> +struct OpcodeDecision {
> +  struct ModRMDecision modRMDecisions[256];
> +};
> +
> +/*
> + * ContextDecision - Specifies which opcode->instruction tables to look at given
> + *   a particular context (set of attributes).  Since there are many possible
> + *   contexts, the decoder first uses CONTEXTS_SYM to determine which context
> + *   applies given a specific set of attributes.  Hence there are only IC_max
> + *   entries in this table, rather than 2^(ATTR_max).
> + */
> +struct ContextDecision {
> +  struct OpcodeDecision opcodeDecisions[IC_max];
> +};
> +
> +/*
> + * Physical encodings of instruction operands.
> + */
> +
> +#define ENCODINGS                                                              \
> +  ENUM_ENTRY(ENCODING_NONE,   "")                                              \
> +  ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \
> +  ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \
> +  ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \
> +  ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \
> +  ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \
> +  ENUM_ENTRY(ENCODING_CP,     "6-byte")                                        \
> +  ENUM_ENTRY(ENCODING_CO,     "8-byte")                                        \
> +  ENUM_ENTRY(ENCODING_CT,     "10-byte")                                       \
> +  ENUM_ENTRY(ENCODING_IB,     "1-byte immediate")                              \
> +  ENUM_ENTRY(ENCODING_IW,     "2-byte")                                        \
> +  ENUM_ENTRY(ENCODING_ID,     "4-byte")                                        \
> +  ENUM_ENTRY(ENCODING_IO,     "8-byte")                                        \
> +  ENUM_ENTRY(ENCODING_RB,     "(AL..DIL, R8L..R15L) Register code added to "   \
> +                              "the opcode byte")                               \
> +  ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \
> +  ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \
> +  ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \
> +  ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \
> +                              "opcode byte")                                   \
> +                                                                               \
> +  ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \
> +  ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \
> +  ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \
> +                              "opcode byte")                                   \
> +  ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
> +                              "in type")
> +
> +#define ENUM_ENTRY(n, d) n,
> +  typedef enum {
> +    ENCODINGS
> +    ENCODING_max
> +  } OperandEncoding;
> +#undef ENUM_ENTRY
> +
> +/*
> + * Semantic interpretations of instruction operands.
> + */
> +
> +#define TYPES                                                                  \
> +  ENUM_ENTRY(TYPE_NONE,       "")                                              \
> +  ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \
> +  ENUM_ENTRY(TYPE_REL16,      "2-byte")                                        \
> +  ENUM_ENTRY(TYPE_REL32,      "4-byte")                                        \
> +  ENUM_ENTRY(TYPE_REL64,      "8-byte")                                        \
> +  ENUM_ENTRY(TYPE_PTR1616,    "2+2-byte segment+offset address")               \
> +  ENUM_ENTRY(TYPE_PTR1632,    "2+4-byte")                                      \
> +  ENUM_ENTRY(TYPE_PTR1664,    "2+8-byte")                                      \
> +  ENUM_ENTRY(TYPE_R8,         "1-byte register operand")                       \
> +  ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \
> +  ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
> +  ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
> +  ENUM_ENTRY(TYPE_IMM8,       "1-byte immediate operand")                      \
> +  ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
> +  ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
> +  ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
> +  ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
> +  ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
> +  ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
> +  ENUM_ENTRY(TYPE_RM64,       "8-byte")                                        \
> +  ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
> +  ENUM_ENTRY(TYPE_M8,         "1-byte")                                        \
> +  ENUM_ENTRY(TYPE_M16,        "2-byte")                                        \
> +  ENUM_ENTRY(TYPE_M32,        "4-byte")                                        \
> +  ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
> +  ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
> +  ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
> +  ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
> +  ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \
> +  ENUM_ENTRY(TYPE_M16_32,     "2+4-byte two-part memory operand (LIDT, LGDT)") \
> +  ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \
> +  ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \
> +  ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \
> +  ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \
> +                              "base)")                                         \
> +  ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \
> +  ENUM_ENTRY(TYPE_MOFFS32,    "4-byte")                                        \
> +  ENUM_ENTRY(TYPE_MOFFS64,    "8-byte")                                        \
> +  ENUM_ENTRY(TYPE_SREG,       "Byte with single bit set: 0 = ES, 1 = CS, "     \
> +                              "2 = SS, 3 = DS, 4 = FS, 5 = GS")                \
> +  ENUM_ENTRY(TYPE_M32FP,      "32-bit IEE754 memory floating-point operand")   \
> +  ENUM_ENTRY(TYPE_M64FP,      "64-bit")                                        \
> +  ENUM_ENTRY(TYPE_M80FP,      "80-bit extended")                               \
> +  ENUM_ENTRY(TYPE_M16INT,     "2-byte memory integer operand for use in "      \
> +                              "floating-point instructions")                   \
> +  ENUM_ENTRY(TYPE_M32INT,     "4-byte")                                        \
> +  ENUM_ENTRY(TYPE_M64INT,     "8-byte")                                        \
> +  ENUM_ENTRY(TYPE_ST,         "Position on the floating-point stack")          \
> +  ENUM_ENTRY(TYPE_MM,         "MMX register operand")                          \
> +  ENUM_ENTRY(TYPE_MM32,       "4-byte MMX register or memory operand")         \
> +  ENUM_ENTRY(TYPE_MM64,       "8-byte")                                        \
> +  ENUM_ENTRY(TYPE_XMM,        "XMM register operand")                          \
> +  ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \
> +  ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \
> +  ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \
> +  ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
> +  ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
> +  ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
> +  ENUM_ENTRY(TYPE_CR32,       "4-byte control register operand")               \
> +  ENUM_ENTRY(TYPE_CR64,       "8-byte")                                        \
> +                                                                               \
> +  ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \
> +  ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \
> +  ENUM_ENTRY(TYPE_IMMv,       "Immediate operand of operand size")             \
> +  ENUM_ENTRY(TYPE_RELv,       "Immediate address of operand size")             \
> +  ENUM_ENTRY(TYPE_DUP0,       "Duplicate of operand 0")                        \
> +  ENUM_ENTRY(TYPE_DUP1,       "operand 1")                                     \
> +  ENUM_ENTRY(TYPE_DUP2,       "operand 2")                                     \
> +  ENUM_ENTRY(TYPE_DUP3,       "operand 3")                                     \
> +  ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \
> +  ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")
> +
> +#define ENUM_ENTRY(n, d) n,
> +typedef enum {
> +  TYPES
> +  TYPE_max
> +} OperandType;
> +#undef ENUM_ENTRY
> +
> +/*
> + * OperandSpecifier - The specification for how to extract and interpret one
> + *   operand.
> + */
> +struct OperandSpecifier {
> +  OperandEncoding  encoding;
> +  OperandType      type;
> +};
> +
> +/*
> + * Indicates where the opcode modifier (if any) is to be found.  Extended
> + * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
> + */
> +
> +#define MODIFIER_TYPES        \
> +  ENUM_ENTRY(MODIFIER_NONE)   \
> +  ENUM_ENTRY(MODIFIER_OPCODE) \
> +  ENUM_ENTRY(MODIFIER_MODRM)
> +
> +#define ENUM_ENTRY(n) n,
> +typedef enum {
> +  MODIFIER_TYPES
> +  MODIFIER_max
> +} ModifierType;
> +#undef ENUM_ENTRY
> +
> +#define X86_MAX_OPERANDS 5
> +
> +/*
> + * The specification for how to extract and interpret a full instruction and
> + * its operands.
> + */
> +struct InstructionSpecifier {
> +  ModifierType modifierType;
> +  uint8_t modifierBase;
> +  struct OperandSpecifier operands[X86_MAX_OPERANDS];
> +
> +  /* The macro below must be defined wherever this file is included. */
> +  INSTRUCTION_SPECIFIER_FIELDS
> +};
> +
> +/*
> + * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode
> + * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
> + * respectively.
> + */
> +typedef enum {
> +  MODE_16BIT,
> +  MODE_32BIT,
> +  MODE_64BIT
> +} DisassemblerMode;
> +
> +#endif
>
> Modified: llvm/trunk/lib/Target/X86/Makefile
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Makefile?rev=91749&r1=91748&r2=91749&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/Makefile (original)
> +++ llvm/trunk/lib/Target/X86/Makefile Fri Dec 18 20:59:52 2009
> @@ -15,8 +15,8 @@
>                 X86GenRegisterInfo.inc X86GenInstrNames.inc \
>                 X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \
>                 X86GenAsmWriter1.inc X86GenDAGISel.inc  \
> -                X86GenFastISel.inc \
> -                X86GenCallingConv.inc X86GenSubtarget.inc
> +                X86GenDisassemblerTables.inc X86GenFastISel.inc \
> +                X86GenCallingConv.inc X86GenSubtarget.inc \
>
>  DIRS = AsmPrinter AsmParser Disassembler TargetInfo
>
>
> Modified: llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetMachine.cpp?rev=91749&r1=91748&r2=91749&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86TargetMachine.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86TargetMachine.cpp Fri Dec 18 20:59:52 2009
> @@ -38,6 +38,8 @@
>   }
>  }
>
> +extern "C" void LLVMInitializeX86Disassembler();
> +
>  extern "C" void LLVMInitializeX86Target() {
>   // Register the target.
>   RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
> @@ -47,6 +49,8 @@
>   RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);
>   RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
>
> +  LLVMInitializeX86Disassembler();
> +
>   // Register the code emitter.
>   TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter);
>   TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter);
>
> Modified: llvm/trunk/utils/TableGen/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CMakeLists.txt?rev=91749&r1=91748&r2=91749&view=diff
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/CMakeLists.txt (original)
> +++ llvm/trunk/utils/TableGen/CMakeLists.txt Fri Dec 18 20:59:52 2009
> @@ -23,6 +23,8 @@
>   TGValueTypes.cpp
>   TableGen.cpp
>   TableGenBackend.cpp
> +  X86DisassemblerTables.cpp
> +  X86RecognizableInstr.cpp
>   )
>
>  target_link_libraries(tblgen LLVMSupport LLVMSystem)
>
> Modified: llvm/trunk/utils/TableGen/DisassemblerEmitter.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/DisassemblerEmitter.cpp?rev=91749&r1=91748&r2=91749&view=diff
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/DisassemblerEmitter.cpp (original)
> +++ llvm/trunk/utils/TableGen/DisassemblerEmitter.cpp Fri Dec 18 20:59:52 2009
> @@ -10,7 +10,86 @@
>  #include "DisassemblerEmitter.h"
>  #include "CodeGenTarget.h"
>  #include "Record.h"
> +#include "X86DisassemblerTables.h"
> +#include "X86RecognizableInstr.h"
>  using namespace llvm;
> +using namespace llvm::X86Disassembler;
> +
> +/// DisassemblerEmitter - Contains disassembler table emitters for various
> +/// architectures.
> +
> +/// X86 Disassembler Emitter
> +///
> +/// *** IF YOU'RE HERE TO RESOLVE A "Primary decode conflict", LOOK DOWN NEAR
> +///     THE END OF THIS COMMENT!
> +///
> +/// The X86 disassembler emitter is part of the X86 Disassembler, which is
> +/// documented in lib/Target/X86/X86Disassembler.h.
> +///
> +/// The emitter produces the tables that the disassembler uses to translate
> +/// instructions.  The emitter generates the following tables:
> +///
> +/// - One table (CONTEXTS_SYM) that contains a mapping of attribute masks to
> +///   instruction contexts.  Although for each attribute there are cases where
> +///   that attribute determines decoding, in the majority of cases decoding is
> +///   the same whether or not an attribute is present.  For example, a 64-bit
> +///   instruction with an OPSIZE prefix and an XS prefix decodes the same way in
> +///   all cases as a 64-bit instruction with only OPSIZE set.  (The XS prefix
> +///   may have effects on its execution, but does not change the instruction
> +///   returned.)  This allows considerable space savings in other tables.
> +/// - Four tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and
> +///   THREEBYTE3A_SYM) contain the hierarchy that the decoder traverses while
> +///   decoding an instruction.  At the lowest level of this hierarchy are
> +///   instruction UIDs, 16-bit integers that can be used to uniquely identify
> +///   the instruction and correspond exactly to its position in the list of
> +///   CodeGenInstructions for the target.
> +/// - One table (INSTRUCTIONS_SYM) contains information about the operands of
> +///   each instruction and how to decode them.
> +///
> +/// During table generation, there may be conflicts between instructions that
> +/// occupy the same space in the decode tables.  These conflicts are resolved as
> +/// follows in setTableFields() (X86DisassemblerTables.cpp)
> +///
> +/// - If the current context is the native context for one of the instructions
> +///   (that is, the attributes specified for it in the LLVM tables specify
> +///   precisely the current context), then it has priority.
> +/// - If the current context isn't native for either of the instructions, then
> +///   the higher-priority context wins (that is, the one that is more specific).
> +///   That hierarchy is determined by outranks() (X86DisassemblerTables.cpp)
> +/// - If the current context is native for both instructions, then the table
> +///   emitter reports a conflict and dies.
> +///
> +/// *** RESOLUTION FOR "Primary decode conflict"S
> +///
> +/// If two instructions collide, typically the solution is (in order of
> +/// likelihood):
> +///
> +/// (1) to filter out one of the instructions by editing filter()
> +///     (X86RecognizableInstr.cpp).  This is the most common resolution, but
> +///     check the Intel manuals first to make sure that (2) and (3) are not the
> +///     problem.
> +/// (2) to fix the tables (X86.td and its subsidiaries) so the opcodes are
> +///     accurate.  Sometimes they are not.
> +/// (3) to fix the tables to reflect the actual context (for example, required
> +///     prefixes), and possibly to add a new context by editing
> +///     lib/Target/X86/X86DisassemblerDecoderCommon.h.  This is unlikely to be
> +///     the cause.
> +///
> +/// DisassemblerEmitter.cpp contains the implementation for the emitter,
> +///   which simply pulls out instructions from the CodeGenTarget and pushes them
> +///   into X86DisassemblerTables.
> +/// X86DisassemblerTables.h contains the interface for the instruction tables,
> +///   which manage and emit the structures discussed above.
> +/// X86DisassemblerTables.cpp contains the implementation for the instruction
> +///   tables.
> +/// X86ModRMFilters.h contains filters that can be used to determine which
> +///   ModR/M values are valid for a particular instruction.  These are used to
> +///   populate ModRMDecisions.
> +/// X86RecognizableInstr.h contains the interface for a single instruction,
> +///   which knows how to translate itself from a CodeGenInstruction and provide
> +///   the information necessary for integration into the tables.
> +/// X86RecognizableInstr.cpp contains the implementation for a single
> +///   instruction.
>
>  void DisassemblerEmitter::run(raw_ostream &OS) {
>   CodeGenTarget Target;
> @@ -25,6 +104,26 @@
>      << " *===---------------------------------------------------------------"
>      << "-------===*/\n";
>
> +  // X86 uses a custom disassembler.
> +  if (Target.getName() == "X86") {
> +    DisassemblerTables Tables;
> +
> +    std::vector<const CodeGenInstruction*> numberedInstructions;
> +    Target.getInstructionsByEnumValue(numberedInstructions);
> +
> +    for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i)
> +      RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i);
> +
> +    // FIXME: As long as we are using exceptions, might as well drop this to the
> +    // actual conflict site.
> +    if (Tables.hasConflicts())
> +      throw TGError(Target.getTargetRecord()->getLoc(),
> +                    "Primary decode conflict");
> +
> +    Tables.emit(OS);
> +    return;
> +  }
> +
>   throw TGError(Target.getTargetRecord()->getLoc(),
>                 "Unable to generate disassembler for this target");
>  }
>
> Added: llvm/trunk/utils/TableGen/X86DisassemblerShared.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/X86DisassemblerShared.h?rev=91749&view=auto
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/X86DisassemblerShared.h (added)
> +++ llvm/trunk/utils/TableGen/X86DisassemblerShared.h Fri Dec 18 20:59:52 2009
> @@ -0,0 +1,37 @@
> +//===- X86DisassemblerShared.h - Emitter shared header ----------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef X86DISASSEMBLERSHARED_H
> +#define X86DISASSEMBLERSHARED_H
> +
> +#include <string>
> +
> +#define INSTRUCTION_SPECIFIER_FIELDS    \
> +  bool                    filtered;     \
> +  InstructionContext      insnContext;  \
> +  std::string             name;         \
> +                                        \
> +  InstructionSpecifier() {              \
> +    filtered = false;                   \
> +    insnContext = IC;                   \
> +    name = "";                          \
> +    modifierType = MODIFIER_NONE;       \
> +    modifierBase = 0;                   \
> +    bzero(operands, sizeof(operands));  \
> +  }
> +
> +#define INSTRUCTION_IDS           \
> +  InstrUID   instructionIDs[256];
> +
> +#include "../../lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h"
> +
> +#undef INSTRUCTION_SPECIFIER_FIELDS
> +#undef INSTRUCTION_IDS
> +
> +#endif
>
> Added: llvm/trunk/utils/TableGen/X86DisassemblerTables.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/X86DisassemblerTables.cpp?rev=91749&view=auto
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/X86DisassemblerTables.cpp (added)
> +++ llvm/trunk/utils/TableGen/X86DisassemblerTables.cpp Fri Dec 18 20:59:52 2009
> @@ -0,0 +1,603 @@
> +//===- X86DisassemblerTables.cpp - Disassembler tables ----------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file is part of the X86 Disassembler Emitter.
> +// It contains the implementation of the disassembler tables.
> +// Documentation for the disassembler emitter in general can be found in
> +//  X86DisasemblerEmitter.h.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "X86DisassemblerShared.h"
> +#include "X86DisassemblerTables.h"
> +
> +#include "TableGenBackend.h"
> +#include "llvm/Support/ErrorHandling.h"
> +#include "llvm/Support/Format.h"
> +
> +#include <string>
> +
> +using namespace llvm;
> +using namespace X86Disassembler;
> +
> +/// inheritsFrom - Indicates whether all instructions in one class also belong
> +///   to another class.
> +///
> +/// @param child  - The class that may be the subset
> +/// @param parent - The class that may be the superset
> +/// @return       - True if child is a subset of parent, false otherwise.
> +static inline bool inheritsFrom(InstructionContext child,
> +                                InstructionContext parent) {
> +  if (child == parent)
> +    return true;
> +
> +  switch (parent) {
> +  case IC:
> +    return true;
> +  case IC_64BIT:
> +    return(inheritsFrom(child, IC_64BIT_REXW)   ||
> +           inheritsFrom(child, IC_64BIT_OPSIZE) ||
> +           inheritsFrom(child, IC_64BIT_XD)     ||
> +           inheritsFrom(child, IC_64BIT_XS));
> +  case IC_OPSIZE:
> +    return(inheritsFrom(child, IC_64BIT_OPSIZE));
> +  case IC_XD:
> +    return(inheritsFrom(child, IC_64BIT_XD));
> +  case IC_XS:
> +    return(inheritsFrom(child, IC_64BIT_XS));
> +  case IC_64BIT_REXW:
> +    return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
> +           inheritsFrom(child, IC_64BIT_REXW_XD) ||
> +           inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
> +  case IC_64BIT_OPSIZE:
> +    return(inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
> +  case IC_64BIT_XD:
> +    return(inheritsFrom(child, IC_64BIT_REXW_XD));
> +  case IC_64BIT_XS:
> +    return(inheritsFrom(child, IC_64BIT_REXW_XS));
> +  case IC_64BIT_REXW_XD:
> +    return false;
> +  case IC_64BIT_REXW_XS:
> +    return false;
> +  case IC_64BIT_REXW_OPSIZE:
> +    return false;
> +  default:
> +    return false;
> +  }
> +}
> +
> +/// outranks - Indicates whether, if an instruction has two different applicable
> +///   classes, which class should be preferred when performing decode.  This
> +///   imposes a total ordering (ties are resolved toward "lower")
> +///
> +/// @param upper  - The class that may be preferable
> +/// @param lower  - The class that may be less preferable
> +/// @return       - True if upper is to be preferred, false otherwise.
> +static inline bool outranks(InstructionContext upper,
> +                            InstructionContext lower) {
> +  assert(upper < IC_max);
> +  assert(lower < IC_max);
> +
> +#define ENUM_ENTRY(n, r, d) r,
> +  static int ranks[IC_max] = {
> +    INSTRUCTION_CONTEXTS
> +  };
> +#undef ENUM_ENTRY
> +
> +  return (ranks[upper] > ranks[lower]);
> +}
> +
> +/// stringForContext - Returns a string containing the name of a particular
> +///   InstructionContext, usually for diagnostic purposes.
> +///
> +/// @param insnContext  - The instruction class to transform to a string.
> +/// @return           - A statically-allocated string constant that contains the
> +///                     name of the instruction class.
> +static inline const char* stringForContext(InstructionContext insnContext) {
> +  switch (insnContext) {
> +  default:
> +    llvm_unreachable("Unhandled instruction class");
> +#define ENUM_ENTRY(n, r, d)   case n: return #n; break;
> +  INSTRUCTION_CONTEXTS
> +#undef ENUM_ENTRY
> +  }
> +}
> +
> +/// stringForOperandType - Like stringForContext, but for OperandTypes.
> +static inline const char* stringForOperandType(OperandType type) {
> +  switch (type) {
> +  default:
> +    llvm_unreachable("Unhandled type");
> +#define ENUM_ENTRY(i, d) case i: return #i;
> +  TYPES
> +#undef ENUM_ENTRY
> +  }
> +}
> +
> +/// stringForOperandEncoding - like stringForContext, but for
> +///   OperandEncodings.
> +static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
> +  switch (encoding) {
> +  default:
> +    llvm_unreachable("Unhandled encoding");
> +#define ENUM_ENTRY(i, d) case i: return #i;
> +  ENCODINGS
> +#undef ENUM_ENTRY
> +  }
> +}
> +
> +void DisassemblerTables::emitOneID(raw_ostream &o,
> +                                   uint32_t &i,
> +                                   InstrUID id,
> +                                   bool addComma) const {
> +  if (id)
> +    o.indent(i * 2) << format("0x%hx", id);
> +  else
> +    o.indent(i * 2) << 0;
> +
> +  if (addComma)
> +    o << ", ";
> +  else
> +    o << "  ";
> +
> +  o << "/* ";
> +  o << InstructionSpecifiers[id].name;
> +  o << "*/";
> +
> +  o << "\n";
> +}
> +
> +/// emitEmptyTable - Emits the modRMEmptyTable, which is used as a ID table by
> +///   all ModR/M decisions for instructions that are invalid for all possible
> +///   ModR/M byte values.
> +///
> +/// @param o        - The output stream on which to emit the table.
> +/// @param i        - The indentation level for that output stream.
> +static void emitEmptyTable(raw_ostream &o, uint32_t &i)
> +{
> +  o.indent(i * 2) << "InstrUID modRMEmptyTable[1] = { 0 };" << "\n";
> +  o << "\n";
> +}
> +
> +/// getDecisionType - Determines whether a ModRM decision with 255 entries can
> +///   be compacted by eliminating redundant information.
> +///
> +/// @param decision - The decision to be compacted.
> +/// @return         - The compactest available representation for the decision.
> +static ModRMDecisionType getDecisionType(ModRMDecision &decision)
> +{
> +  bool satisfiesOneEntry = true;
> +  bool satisfiesSplitRM = true;
> +
> +  uint16_t index;
> +
> +  for (index = 0; index < 256; ++index) {
> +    if (decision.instructionIDs[index] != decision.instructionIDs[0])
> +      satisfiesOneEntry = false;
> +
> +    if (((index & 0xc0) == 0xc0) &&
> +       (decision.instructionIDs[index] != decision.instructionIDs[0xc0]))
> +      satisfiesSplitRM = false;
> +
> +    if (((index & 0xc0) != 0xc0) &&
> +       (decision.instructionIDs[index] != decision.instructionIDs[0x00]))
> +      satisfiesSplitRM = false;
> +  }
> +
> +  if (satisfiesOneEntry)
> +    return MODRM_ONEENTRY;
> +
> +  if (satisfiesSplitRM)
> +    return MODRM_SPLITRM;
> +
> +  return MODRM_FULL;
> +}
> +
> +/// stringForDecisionType - Returns a statically-allocated string corresponding
> +///   to a particular decision type.
> +///
> +/// @param dt - The decision type.
> +/// @return   - A pointer to the statically-allocated string (e.g.,
> +///             "MODRM_ONEENTRY" for MODRM_ONEENTRY).
> +static const char* stringForDecisionType(ModRMDecisionType dt)
> +{
> +#define ENUM_ENTRY(n) case n: return #n;
> +  switch (dt) {
> +    default:
> +      llvm_unreachable("Unknown decision type");
> +    MODRMTYPES
> +  };
> +#undef ENUM_ENTRY
> +}
> +
> +/// stringForModifierType - Returns a statically-allocated string corresponding
> +///   to an opcode modifier type.
> +///
> +/// @param mt - The modifier type.
> +/// @return   - A pointer to the statically-allocated string (e.g.,
> +///             "MODIFIER_NONE" for MODIFIER_NONE).
> +static const char* stringForModifierType(ModifierType mt)
> +{
> +#define ENUM_ENTRY(n) case n: return #n;
> +  switch(mt) {
> +    default:
> +      llvm_unreachable("Unknown modifier type");
> +    MODIFIER_TYPES
> +  };
> +#undef ENUM_ENTRY
> +}
> +
> +DisassemblerTables::DisassemblerTables() {
> +  unsigned i;
> +
> +  for (i = 0; i < 4; i++) {
> +    Tables[i] = new ContextDecision;
> +    bzero(Tables[i], sizeof(ContextDecision));
> +  }
> +
> +  HasConflicts = false;
> +}
> +
> +DisassemblerTables::~DisassemblerTables() {
> +  unsigned i;
> +
> +  for (i = 0; i < 4; i++)
> +    delete Tables[i];
> +}
> +
> +void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
> +                                           raw_ostream &o2,
> +                                           uint32_t &i1,
> +                                           uint32_t &i2,
> +                                           ModRMDecision &decision)
> +  const {
> +  static uint64_t sTableNumber = 0;
> +  uint64_t thisTableNumber = sTableNumber;
> +  ModRMDecisionType dt = getDecisionType(decision);
> +  uint16_t index;
> +
> +  if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0)
> +  {
> +    o2.indent(i2) << "{ /* ModRMDecision */" << "\n";
> +    i2++;
> +
> +    o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
> +    o2.indent(i2) << "modRMEmptyTable";
> +
> +    i2--;
> +    o2.indent(i2) << "}";
> +    return;
> +  }
> +
> +  o1.indent(i1) << "InstrUID modRMTable" << thisTableNumber;
> +
> +  switch (dt) {
> +    default:
> +      llvm_unreachable("Unknown decision type");
> +    case MODRM_ONEENTRY:
> +      o1 << "[1]";
> +      break;
> +    case MODRM_SPLITRM:
> +      o1 << "[2]";
> +      break;
> +    case MODRM_FULL:
> +      o1 << "[256]";
> +      break;
> +  }
> +
> +  o1 << " = {" << "\n";
> +  i1++;
> +
> +  switch (dt) {
> +    default:
> +      llvm_unreachable("Unknown decision type");
> +    case MODRM_ONEENTRY:
> +      emitOneID(o1, i1, decision.instructionIDs[0], false);
> +      break;
> +    case MODRM_SPLITRM:
> +      emitOneID(o1, i1, decision.instructionIDs[0x00], true); // mod = 0b00
> +      emitOneID(o1, i1, decision.instructionIDs[0xc0], false); // mod = 0b11
> +      break;
> +    case MODRM_FULL:
> +      for (index = 0; index < 256; ++index)
> +        emitOneID(o1, i1, decision.instructionIDs[index], index < 255);
> +      break;
> +  }
> +
> +  i1--;
> +  o1.indent(i1) << "};" << "\n";
> +  o1 << "\n";
> +
> +  o2.indent(i2) << "{ /* struct ModRMDecision */" << "\n";
> +  i2++;
> +
> +  o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
> +  o2.indent(i2) << "modRMTable" << sTableNumber << "\n";
> +
> +  i2--;
> +  o2.indent(i2) << "}";
> +
> +  ++sTableNumber;
> +}
> +
> +void DisassemblerTables::emitOpcodeDecision(
> +  raw_ostream &o1,
> +  raw_ostream &o2,
> +  uint32_t &i1,
> +  uint32_t &i2,
> +  OpcodeDecision &decision) const {
> +  uint16_t index;
> +
> +  o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n";
> +  i2++;
> +  o2.indent(i2) << "{" << "\n";
> +  i2++;
> +
> +  for (index = 0; index < 256; ++index) {
> +    o2.indent(i2);
> +
> +    o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n";
> +
> +    emitModRMDecision(o1, o2, i1, i2, decision.modRMDecisions[index]);
> +
> +    if (index <  255)
> +      o2 << ",";
> +
> +    o2 << "\n";
> +  }
> +
> +  i2--;
> +  o2.indent(i2) << "}" << "\n";
> +  i2--;
> +  o2.indent(i2) << "}" << "\n";
> +}
> +
> +void DisassemblerTables::emitContextDecision(
> +  raw_ostream &o1,
> +  raw_ostream &o2,
> +  uint32_t &i1,
> +  uint32_t &i2,
> +  ContextDecision &decision,
> +  const char* name) const {
> +  o2.indent(i2) << "struct ContextDecision " << name << " = {" << "\n";
> +  i2++;
> +  o2.indent(i2) << "{ /* opcodeDecisions */" << "\n";
> +  i2++;
> +
> +  unsigned index;
> +
> +  for (index = 0; index < IC_max; ++index) {
> +    o2.indent(i2) << "/* ";
> +    o2 << stringForContext((InstructionContext)index);
> +    o2 << " */";
> +    o2 << "\n";
> +
> +    emitOpcodeDecision(o1, o2, i1, i2, decision.opcodeDecisions[index]);
> +
> +    if (index + 1 < IC_max)
> +      o2 << ", ";
> +  }
> +
> +  i2--;
> +  o2.indent(i2) << "}" << "\n";
> +  i2--;
> +  o2.indent(i2) << "};" << "\n";
> +}
> +
> +void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)
> +  const {
> +  o.indent(i * 2) << "struct InstructionSpecifier ";
> +  o << INSTRUCTIONS_STR << "[";
> +  o << InstructionSpecifiers.size();
> +  o << "] = {" << "\n";
> +
> +  i++;
> +
> +  uint16_t numInstructions = InstructionSpecifiers.size();
> +  uint16_t index, operandIndex;
> +
> +  for (index = 0; index < numInstructions; ++index) {
> +    o.indent(i * 2) << "{ /* " << index << " */" << "\n";
> +    i++;
> +
> +    o.indent(i * 2) <<
> +      stringForModifierType(InstructionSpecifiers[index].modifierType);
> +    o << "," << "\n";
> +
> +    o.indent(i * 2) << "0x";
> +    o << format("%02hhx", (uint16_t)InstructionSpecifiers[index].modifierBase);
> +    o << "," << "\n";
> +
> +    o.indent(i * 2) << "{" << "\n";
> +    i++;
> +
> +    for (operandIndex = 0; operandIndex < X86_MAX_OPERANDS; ++operandIndex) {
> +      o.indent(i * 2) << "{ ";
> +      o << stringForOperandEncoding(InstructionSpecifiers[index]
> +                                    .operands[operandIndex]
> +                                    .encoding);
> +      o << ", ";
> +      o << stringForOperandType(InstructionSpecifiers[index]
> +                                .operands[operandIndex]
> +                                .type);
> +      o << " }";
> +
> +      if (operandIndex < X86_MAX_OPERANDS - 1)
> +        o << ",";
> +
> +      o << "\n";
> +    }
> +
> +    i--;
> +    o.indent(i * 2) << "}," << "\n";
> +
> +    o.indent(i * 2) << "\"" << InstructionSpecifiers[index].name << "\"";
> +    o << "\n";
> +
> +    i--;
> +    o.indent(i * 2) << "}";
> +
> +    if (index + 1 < numInstructions)
> +      o << ",";
> +
> +    o << "\n";
> +  }
> +
> +  i--;
> +  o.indent(i * 2) << "};" << "\n";
> +}
> +
> +void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
> +  uint16_t index;
> +
> +  o.indent(i * 2) << "InstructionContext ";
> +  o << CONTEXTS_STR << "[256] = {" << "\n";
> +  i++;
> +
> +  for (index = 0; index < 256; ++index) {
> +    o.indent(i * 2);
> +
> +    if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
> +      o << "IC_64BIT_REXW_XS";
> +    else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
> +      o << "IC_64BIT_REXW_XD";
> +    else if ((index & ATTR_64BIT) && (index & ATTR_REXW) &&
> +             (index & ATTR_OPSIZE))
> +      o << "IC_64BIT_REXW_OPSIZE";
> +    else if ((index & ATTR_64BIT) && (index & ATTR_XS))
> +      o << "IC_64BIT_XS";
> +    else if ((index & ATTR_64BIT) && (index & ATTR_XD))
> +      o << "IC_64BIT_XD";
> +    else if ((index & ATTR_64BIT) && (index & ATTR_OPSIZE))
> +      o << "IC_64BIT_OPSIZE";
> +    else if ((index & ATTR_64BIT) && (index & ATTR_REXW))
> +      o << "IC_64BIT_REXW";
> +    else if ((index & ATTR_64BIT))
> +      o << "IC_64BIT";
> +    else if (index & ATTR_XS)
> +      o << "IC_XS";
> +    else if (index & ATTR_XD)
> +      o << "IC_XD";
> +    else if (index & ATTR_OPSIZE)
> +      o << "IC_OPSIZE";
> +    else
> +      o << "IC";
> +
> +    if (index < 255)
> +      o << ",";
> +    else
> +      o << " ";
> +
> +    o << " /* " << index << " */";
> +
> +    o << "\n";
> +  }
> +
> +  i--;
> +  o.indent(i * 2) << "};" << "\n";
> +}
> +
> +void DisassemblerTables::emitContextDecisions(raw_ostream &o1,
> +                                            raw_ostream &o2,
> +                                            uint32_t &i1,
> +                                            uint32_t &i2)
> +  const {
> +  emitContextDecision(o1, o2, i1, i2, *Tables[0], ONEBYTE_STR);
> +  emitContextDecision(o1, o2, i1, i2, *Tables[1], TWOBYTE_STR);
> +  emitContextDecision(o1, o2, i1, i2, *Tables[2], THREEBYTE38_STR);
> +  emitContextDecision(o1, o2, i1, i2, *Tables[3], THREEBYTE3A_STR);
> +}
> +
> +void DisassemblerTables::emit(raw_ostream &o) const {
> +  uint32_t i1 = 0;
> +  uint32_t i2 = 0;
> +
> +  std::string s1;
> +  std::string s2;
> +
> +  raw_string_ostream o1(s1);
> +  raw_string_ostream o2(s2);
> +
> +  emitInstructionInfo(o, i2);
> +  o << "\n";
> +
> +  emitContextTable(o, i2);
> +  o << "\n";
> +
> +  emitEmptyTable(o1, i1);
> +  emitContextDecisions(o1, o2, i1, i2);
> +
> +  o << o1.str();
> +  o << "\n";
> +  o << o2.str();
> +  o << "\n";
> +  o << "\n";
> +}
> +
> +void DisassemblerTables::setTableFields(ModRMDecision     &decision,
> +                                        const ModRMFilter &filter,
> +                                        InstrUID          uid,
> +                                        uint8_t           opcode) {
> +  unsigned index;
> +
> +  for (index = 0; index < 256; ++index) {
> +    if (filter.accepts(index)) {
> +      if (decision.instructionIDs[index] == uid)
> +        continue;
> +
> +      if (decision.instructionIDs[index] != 0) {
> +        InstructionSpecifier &newInfo =
> +          InstructionSpecifiers[uid];
> +        InstructionSpecifier &previousInfo =
> +          InstructionSpecifiers[decision.instructionIDs[index]];
> +
> +        if(newInfo.filtered)
> +          continue; // filtered instructions get lowest priority
> +
> +        if(previousInfo.name == "NOOP")
> +          continue; // special case for XCHG32ar and NOOP
> +
> +        if (outranks(previousInfo.insnContext, newInfo.insnContext))
> +          continue;
> +
> +        if (previousInfo.insnContext == newInfo.insnContext &&
> +            !previousInfo.filtered) {
> +          errs() << "Error: Primary decode conflict: ";
> +          errs() << newInfo.name << " would overwrite " << previousInfo.name;
> +          errs() << "\n";
> +          errs() << "ModRM   " << index << "\n";
> +          errs() << "Opcode  " << (uint16_t)opcode << "\n";
> +          errs() << "Context " << stringForContext(newInfo.insnContext) << "\n";
> +          HasConflicts = true;
> +        }
> +      }
> +
> +      decision.instructionIDs[index] = uid;
> +    }
> +  }
> +}
> +
> +void DisassemblerTables::setTableFields(OpcodeType          type,
> +                                        InstructionContext  insnContext,
> +                                        uint8_t             opcode,
> +                                        const ModRMFilter   &filter,
> +                                        InstrUID            uid) {
> +  unsigned index;
> +
> +  ContextDecision &decision = *Tables[type];
> +
> +  for (index = 0; index < IC_max; ++index) {
> +    if (inheritsFrom((InstructionContext)index,
> +                     InstructionSpecifiers[uid].insnContext))
> +      setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode],
> +                     filter,
> +                     uid,
> +                     opcode);
> +  }
> +}
>
> Added: llvm/trunk/utils/TableGen/X86DisassemblerTables.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/X86DisassemblerTables.h?rev=91749&view=auto
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/X86DisassemblerTables.h (added)
> +++ llvm/trunk/utils/TableGen/X86DisassemblerTables.h Fri Dec 18 20:59:52 2009
> @@ -0,0 +1,291 @@
> +//===- X86DisassemblerTables.h - Disassembler tables ------------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file is part of the X86 Disassembler Emitter.
> +// It contains the interface of the disassembler tables.
> +// Documentation for the disassembler emitter in general can be found in
> +//  X86DisasemblerEmitter.h.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef X86DISASSEMBLERTABLES_H
> +#define X86DISASSEMBLERTABLES_H
> +
> +#include "X86DisassemblerShared.h"
> +#include "X86ModRMFilters.h"
> +
> +#include "llvm/Support/raw_ostream.h"
> +
> +#include <vector>
> +
> +namespace llvm {
> +
> +namespace X86Disassembler {
> +
> +/// DisassemblerTables - Encapsulates all the decode tables being generated by
> +///   the table emitter.  Contains functions to populate the tables as well as
> +///   to emit them as hierarchical C structures suitable for consumption by the
> +///   runtime.
> +class DisassemblerTables {
> +private:
> +  /// The decoder tables.  There is one for each opcode type:
> +  /// [0] one-byte opcodes
> +  /// [1] two-byte opcodes of the form 0f __
> +  /// [2] three-byte opcodes of the form 0f 38 __
> +  /// [3] three-byte opcodes of the form 0f 3a __
> +  ContextDecision* Tables[4];
> +
> +  /// The instruction information table
> +  std::vector<InstructionSpecifier> InstructionSpecifiers;
> +
> +  /// True if there are primary decode conflicts in the instruction set
> +  bool HasConflicts;
> +
> +  /// emitOneID - Emits a table entry for a single instruction entry, at the
> +  ///   innermost level of the structure hierarchy.  The entry is printed out
> +  ///   in the format "nnnn, /* MNEMONIC */" where nnnn is the ID in decimal,
> +  ///   the comma is printed if addComma is true, and the menonic is the name
> +  ///   of the instruction as listed in the LLVM tables.
> +  ///
> +  /// @param o        - The output stream to print the entry on.
> +  /// @param i        - The indentation level for o.
> +  /// @param id       - The unique ID of the instruction to print.
> +  /// @param addComma - Whether or not to print a comma after the ID.  True if
> +  ///                    additional items will follow.
> +  void emitOneID(raw_ostream &o,
> +                 uint32_t &i,
> +                 InstrUID id,
> +                 bool addComma) const;
> +
> +  /// emitModRMDecision - Emits a table of entries corresponding to a single
> +  ///   ModR/M decision.  Compacts the ModR/M decision if possible.  ModR/M
> +  ///   decisions are printed as:
> +  ///
> +  ///   { /* struct ModRMDecision */
> +  ///     TYPE,
> +  ///     modRMTablennnn
> +  ///   }
> +  ///
> +  ///   where nnnn is a unique ID for the corresponding table of IDs.
> +  ///   TYPE indicates whether the table has one entry that is the same
> +  ///   regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one
> +  ///   for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.
> +  ///   nnnn is the number of a table for looking up these values.  The tables
> +  ///   are writen separately so that tables consisting entirely of zeros will
> +  ///   not be duplicated.  (These all have the name modRMEmptyTable.)  A table
> +  ///   is printed as:
> +  ///
> +  ///   InstrUID modRMTablennnn[k] = {
> +  ///     nnnn, /* MNEMONIC */
> +  ///     ...
> +  ///     nnnn /* MNEMONIC */
> +  ///   };
> +  ///
> +  /// @param o1       - The output stream to print the ID table to.
> +  /// @param o2       - The output stream to print the decision structure to.
> +  /// @param i1       - The indentation level to use with stream o1.
> +  /// @param i2       - The indentation level to use with stream o2.
> +  /// @param decision - The ModR/M decision to emit.  This decision has 256
> +  ///                   entries - emitModRMDecision decides how to compact it.
> +  void emitModRMDecision(raw_ostream &o1,
> +                         raw_ostream &o2,
> +                         uint32_t &i1,
> +                         uint32_t &i2,
> +                         ModRMDecision &decision) const;
> +
> +  /// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M
> +  ///   decisions.  An OpcodeDecision is printed as:
> +  ///
> +  ///   { /* struct OpcodeDecision */
> +  ///     /* 0x00 */
> +  ///     { /* struct ModRMDecision */
> +  ///       ...
> +  ///     }
> +  ///     ...
> +  ///   }
> +  ///
> +  ///   where the ModRMDecision structure is printed as described in the
> +  ///   documentation for emitModRMDecision().  emitOpcodeDecision() passes on a
> +  ///   stream and indent level for the UID tables generated by
> +  ///   emitModRMDecision(), but does not use them itself.
> +  ///
> +  /// @param o1       - The output stream to print the ID tables generated by
> +  ///                   emitModRMDecision() to.
> +  /// @param o2       - The output stream for the decision structure itself.
> +  /// @param i1       - The indent level to use with stream o1.
> +  /// @param i2       - The indent level to use with stream o2.
> +  /// @param decision - The OpcodeDecision to emit along with its subsidiary
> +  ///                    structures.
> +  void emitOpcodeDecision(raw_ostream &o1,
> +                          raw_ostream &o2,
> +                          uint32_t &i1,
> +                          uint32_t &i2,
> +                          OpcodeDecision &decision) const;
> +
> +  /// emitContextDecision - Emits a ContextDecision and all its subsidiary
> +  ///   Opcode and ModRMDecisions.  A ContextDecision is printed as:
> +  ///
> +  ///   struct ContextDecision NAME = {
> +  ///     { /* OpcodeDecisions */
> +  ///       /* IC */
> +  ///       { /* struct OpcodeDecision */
> +  ///         ...
> +  ///       },
> +  ///       ...
> +  ///     }
> +  ///   }
> +  ///
> +  ///   NAME is the name of the ContextDecision (typically one of the four names
> +  ///   ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and THREEBYTE3A_SYM from
> +  ///   X86DisassemblerDecoderCommon.h).
> +  ///   IC is one of the contexts in InstructionContext.  There is an opcode
> +  ///   decision for each possible context.
> +  ///   The OpcodeDecision structures are printed as described in the
> +  ///   documentation for emitOpcodeDecision.
> +  ///
> +  /// @param o1       - The output stream to print the ID tables generated by
> +  ///                   emitModRMDecision() to.
> +  /// @param o2       - The output stream to print the decision structure to.
> +  /// @param i1       - The indent level to use with stream o1.
> +  /// @param i2       - The indent level to use with stream o2.
> +  /// @param decision - The ContextDecision to emit along with its subsidiary
> +  ///                   structures.
> +  /// @param name     - The name for the ContextDecision.
> +  void emitContextDecision(raw_ostream &o1,
> +                           raw_ostream &o2,
> +                           uint32_t &i1,
> +                           uint32_t &i2,
> +                           ContextDecision &decision,
> +                           const char* name) const;
> +
> +  /// emitInstructionInfo - Prints the instruction specifier table, which has
> +  ///   one entry for each instruction, and contains name and operand
> +  ///   information.  This table is printed as:
> +  ///
> +  ///   struct InstructionSpecifier CONTEXTS_SYM[k] = {
> +  ///     {
> +  ///       /* nnnn */
> +  ///       "MNEMONIC",
> +  ///       0xnn,
> +  ///       {
> +  ///         {
> +  ///           ENCODING,
> +  ///           TYPE
> +  ///         },
> +  ///         ...
> +  ///       }
> +  ///     },
> +  ///   };
> +  ///
> +  ///   k is the total number of instructions.
> +  ///   nnnn is the ID of the current instruction (0-based).  This table
> +  ///   includes entries for non-instructions like PHINODE.
> +  ///   0xnn is the lowest possible opcode for the current instruction, used for
> +  ///   AddRegFrm instructions to compute the operand's value.
> +  ///   ENCODING and TYPE describe the encoding and type for a single operand.
> +  ///
> +  /// @param o  - The output stream to which the instruction table should be
> +  ///             written.
> +  /// @param i  - The indent level for use with the stream.
> +  void emitInstructionInfo(raw_ostream &o, uint32_t &i) const;
> +
> +  /// emitContextTable - Prints the table that is used to translate from an
> +  ///   instruction attribute mask to an instruction context.  This table is
> +  ///   printed as:
> +  ///
> +  ///   InstructionContext CONTEXTS_STR[256] = {
> +  ///     IC, /* 0x00 */
> +  ///     ...
> +  ///   };
> +  ///
> +  ///   IC is the context corresponding to the mask 0x00, and there are 256
> +  ///   possible masks.
> +  ///
> +  /// @param o  - The output stream to which the context table should be written.
> +  /// @param i  - The indent level for use with the stream.
> +  void emitContextTable(raw_ostream &o, uint32_t &i) const;
> +
> +  /// emitContextDecisions - Prints all four ContextDecision structures using
> +  ///   emitContextDecision().
> +  ///
> +  /// @param o1 - The output stream to print the ID tables generated by
> +  ///             emitModRMDecision() to.
> +  /// @param o2 - The output stream to print the decision structures to.
> +  /// @param i1 - The indent level to use with stream o1.
> +  /// @param i2 - The indent level to use with stream o2.
> +  void emitContextDecisions(raw_ostream &o1,
> +                            raw_ostream &o2,
> +                            uint32_t &i1,
> +                            uint32_t &i2) const;
> +
> +  /// setTableFields - Uses a ModRMFilter to set the appropriate entries in a
> +  ///   ModRMDecision to refer to a particular instruction ID.
> +  ///
> +  /// @param decision - The ModRMDecision to populate.
> +  /// @param filter   - The filter to use in deciding which entries to populate.
> +  /// @param uid      - The unique ID to set matching entries to.
> +  /// @param opcode   - The opcode of the instruction, for error reporting.
> +  void setTableFields(ModRMDecision &decision,
> +                      const ModRMFilter &filter,
> +                      InstrUID uid,
> +                      uint8_t opcode);
> +public:
> +  /// Constructor - Allocates space for the class decisions and clears them.
> +  DisassemblerTables();
> +
> +  ~DisassemblerTables();
> +
> +  /// emit - Emits the instruction table, context table, and class decisions.
> +  ///
> +  /// @param o  - The output stream to print the tables to.
> +  void emit(raw_ostream &o) const;
> +
> +  /// setTableFields - Uses the opcode type, instruction context, opcode, and a
> +  ///   ModRMFilter as criteria to set a particular set of entries in the
> +  ///   decode tables to point to a specific uid.
> +  ///
> +  /// @param type         - The opcode type (ONEBYTE, TWOBYTE, etc.)
> +  /// @param insnContext  - The context to use (IC, IC_64BIT, etc.)
> +  /// @param opcode       - The last byte of the opcode (not counting any escape
> +  ///                       or extended opcodes).
> +  /// @param filter       - The ModRMFilter that decides which ModR/M byte values
> +  ///                       correspond to the desired instruction.
> +  /// @param uid          - The unique ID of the instruction.
> +  void setTableFields(OpcodeType type,
> +                      InstructionContext insnContext,
> +                      uint8_t opcode,
> +                      const ModRMFilter &filter,
> +                      InstrUID uid);
> +
> +  /// specForUID - Returns the instruction specifier for a given unique
> +  ///   instruction ID.  Used when resolving collisions.
> +  ///
> +  /// @param uid  - The unique ID of the instruction.
> +  /// @return     - A reference to the instruction specifier.
> +  InstructionSpecifier& specForUID(InstrUID uid) {
> +    if (uid >= InstructionSpecifiers.size())
> +      InstructionSpecifiers.resize(uid + 1);
> +
> +    return InstructionSpecifiers[uid];
> +  }
> +
> +  // hasConflicts - Reports whether there were primary decode conflicts
> +  //   from any instructions added to the tables.
> +  // @return  - true if there were; false otherwise.
> +
> +  bool hasConflicts() {
> +    return HasConflicts;
> +  }
> +};
> +
> +} // namespace X86Disassembler
> +
> +} // namespace llvm
> +
> +#endif
>
> Added: llvm/trunk/utils/TableGen/X86ModRMFilters.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/X86ModRMFilters.h?rev=91749&view=auto
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/X86ModRMFilters.h (added)
> +++ llvm/trunk/utils/TableGen/X86ModRMFilters.h Fri Dec 18 20:59:52 2009
> @@ -0,0 +1,197 @@
> +//===- X86ModRMFilters.h - Disassembler ModR/M filterss ---------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file is part of the X86 Disassembler Emitter.
> +// It contains ModR/M filters that determine which values of the ModR/M byte
> +//  are valid for a partiuclar instruction.
> +// Documentation for the disassembler emitter in general can be found in
> +//  X86DisasemblerEmitter.h.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef X86MODRMFILTERS_H
> +#define X86MODRMFILTERS_H
> +
> +#include "llvm/System/DataTypes.h"
> +
> +namespace llvm {
> +
> +namespace X86Disassembler {
> +
> +/// ModRMFilter - Abstract base class for clases that recognize patterns in
> +///   ModR/M bytes.
> +class ModRMFilter {
> +public:
> +  /// Destructor    - Override as necessary.
> +  virtual ~ModRMFilter() { }
> +
> +  /// isDumb        - Indicates whether this filter returns the same value for
> +  ///                 any value of the ModR/M byte.
> +  ///
> +  /// @result       - True if the filter returns the same value for any ModR/M
> +  ///                 byte; false if not.
> +  virtual bool isDumb() const { return false; }
> +
> +  /// accepts       - Indicates whether the filter accepts a particular ModR/M
> +  ///                 byte value.
> +  ///
> +  /// @result       - True if the filter accepts the ModR/M byte; false if not.
> +  virtual bool accepts(uint8_t modRM) const = 0;
> +};
> +
> +/// DumbFilter - Accepts any ModR/M byte.  Used for instructions that do not
> +///   require a ModR/M byte or instructions where the entire ModR/M byte is used
> +///   for operands.
> +class DumbFilter : public ModRMFilter {
> +public:
> +  bool isDumb() const {
> +    return true;
> +  }
> +
> +  bool accepts(uint8_t modRM) const {
> +    return true;
> +  }
> +};
> +
> +/// ModFilter - Filters based on the mod bits [bits 7-6] of the ModR/M byte.
> +///   Some instructions are classified based on whether they are 11 or anything
> +///   else.  This filter performs that classification.
> +class ModFilter : public ModRMFilter {
> +private:
> +  bool R;
> +public:
> +  /// Constructor
> +  ///
> +  /// @r            - True if the mod bits of the ModR/M byte must be 11; false
> +  ///                 otherwise.  The name r derives from the fact that the mod
> +  ///                 bits indicate whether the R/M bits [bits 2-0] signify a
> +  ///                 register or a memory operand.
> +  ModFilter(bool r) :
> +    ModRMFilter(),
> +    R(r) {
> +  }
> +
> +  bool accepts(uint8_t modRM) const {
> +    if (R == ((modRM & 0xc0) == 0xc0))
> +      return true;
> +    else
> +      return false;
> +  }
> +};
> +
> +/// EscapeFilter - Filters escape opcodes, which are classified in two ways.  If
> +///   the ModR/M byte is between 0xc0 and 0xff, then there is one slot for each
> +///   possible value.  Otherwise, there is one instruction for each value of the
> +///   nnn field [bits 5-3], known elsewhere as the reg field.
> +class EscapeFilter : public ModRMFilter {
> +private:
> +  bool C0_FF;
> +  uint8_t NNN_or_ModRM;
> +public:
> +  /// Constructor
> +  ///
> +  /// @c0_ff        - True if the ModR/M byte must fall between 0xc0 and 0xff;
> +  ///                 false otherwise.
> +  /// @nnn_or_modRM - If c0_ff is true, the required value of the entire ModR/M
> +  ///                 byte.  If c0_ff is false, the required value of the nnn
> +  ///                 field.
> +  EscapeFilter(bool c0_ff, uint8_t nnn_or_modRM) :
> +    ModRMFilter(),
> +    C0_FF(c0_ff),
> +    NNN_or_ModRM(nnn_or_modRM) {
> +  }
> +
> +  bool accepts(uint8_t modRM) const {
> +    if ((C0_FF && modRM >= 0xc0 && (modRM == NNN_or_ModRM)) ||
> +        (!C0_FF && modRM < 0xc0  && ((modRM & 0x38) >> 3) == NNN_or_ModRM))
> +      return true;
> +    else
> +      return false;
> +  }
> +};
> +
> +/// AddRegEscapeFilter - Some escape opcodes have one of the register operands
> +///   added to the ModR/M byte, meaning that a range of eight ModR/M values
> +///   maps to a single instruction.  Such instructions require the ModR/M byte
> +///   to fall between 0xc0 and 0xff.
> +class AddRegEscapeFilter : public ModRMFilter {
> +private:
> +  uint8_t ModRM;
> +public:
> +  /// Constructor
> +  ///
> +  /// @modRM        - The value of the ModR/M byte when the register operand
> +  ///                 refers to the first register in the register set.
> +  AddRegEscapeFilter(uint8_t modRM) : ModRM(modRM) {
> +  }
> +
> +  bool accepts(uint8_t modRM) const {
> +    if (modRM >= ModRM && modRM < ModRM + 8)
> +      return true;
> +    else
> +      return false;
> +  }
> +};
> +
> +/// ExtendedFilter - Extended opcodes are classified based on the value of the
> +///   mod field [bits 7-6] and the value of the nnn field [bits 5-3].
> +class ExtendedFilter : public ModRMFilter {
> +private:
> +  bool R;
> +  uint8_t NNN;
> +public:
> +  /// Constructor
> +  ///
> +  /// @r            - True if the mod field must be set to 11; false otherwise.
> +  ///                 The name is explained at ModFilter.
> +  /// @nnn          - The required value of the nnn field.
> +  ExtendedFilter(bool r, uint8_t nnn) :
> +    ModRMFilter(),
> +    R(r),
> +    NNN(nnn) {
> +  }
> +
> +  bool accepts(uint8_t modRM) const {
> +    if (((R  && ((modRM & 0xc0) == 0xc0)) ||
> +        (!R && ((modRM & 0xc0) != 0xc0))) &&
> +        (((modRM & 0x38) >> 3) == NNN))
> +      return true;
> +    else
> +      return false;
> +  }
> +};
> +
> +/// ExactFilter - The occasional extended opcode (such as VMCALL or MONITOR)
> +///   requires the ModR/M byte to have a specific value.
> +class ExactFilter : public ModRMFilter
> +{
> +private:
> +  uint8_t ModRM;
> +public:
> +  /// Constructor
> +  ///
> +  /// @modRM        - The required value of the full ModR/M byte.
> +  ExactFilter(uint8_t modRM) :
> +    ModRMFilter(),
> +    ModRM(modRM) {
> +  }
> +
> +  bool accepts(uint8_t modRM) const {
> +    if (ModRM == modRM)
> +      return true;
> +    else
> +      return false;
> +  }
> +};
> +
> +} // namespace X86Disassembler
> +
> +} // namespace llvm
> +
> +#endif
> \ No newline at end of file
>
> Added: llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp?rev=91749&view=auto
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp (added)
> +++ llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp Fri Dec 18 20:59:52 2009
> @@ -0,0 +1,959 @@
> +//===- X86RecognizableInstr.cpp - Disassembler instruction spec --*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file is part of the X86 Disassembler Emitter.
> +// It contains the implementation of a single recognizable instruction.
> +// Documentation for the disassembler emitter in general can be found in
> +//  X86DisasemblerEmitter.h.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "X86DisassemblerShared.h"
> +#include "X86RecognizableInstr.h"
> +#include "X86ModRMFilters.h"
> +
> +#include "llvm/Support/ErrorHandling.h"
> +
> +#include <string>
> +
> +using namespace llvm;
> +
> +// A clone of X86 since we can't depend on something that is generated.
> +namespace X86Local {
> +  enum {
> +    Pseudo      = 0,
> +    RawFrm      = 1,
> +    AddRegFrm   = 2,
> +    MRMDestReg  = 3,
> +    MRMDestMem  = 4,
> +    MRMSrcReg   = 5,
> +    MRMSrcMem   = 6,
> +    MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19,
> +    MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23,
> +    MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27,
> +    MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31,
> +    MRMInitReg  = 32
> +  };
> +
> +  enum {
> +    TB  = 1,
> +    REP = 2,
> +    D8 = 3, D9 = 4, DA = 5, DB = 6,
> +    DC = 7, DD = 8, DE = 9, DF = 10,
> +    XD = 11,  XS = 12,
> +    T8 = 13,  TA = 14
> +  };
> +}
> +
> +#define ONE_BYTE_EXTENSION_TABLES \
> +  EXTENSION_TABLE(80)             \
> +  EXTENSION_TABLE(81)             \
> +  EXTENSION_TABLE(82)             \
> +  EXTENSION_TABLE(83)             \
> +  EXTENSION_TABLE(8f)             \
> +  EXTENSION_TABLE(c0)             \
> +  EXTENSION_TABLE(c1)             \
> +  EXTENSION_TABLE(c6)             \
> +  EXTENSION_TABLE(c7)             \
> +  EXTENSION_TABLE(d0)             \
> +  EXTENSION_TABLE(d1)             \
> +  EXTENSION_TABLE(d2)             \
> +  EXTENSION_TABLE(d3)             \
> +  EXTENSION_TABLE(f6)             \
> +  EXTENSION_TABLE(f7)             \
> +  EXTENSION_TABLE(fe)             \
> +  EXTENSION_TABLE(ff)
> +
> +#define TWO_BYTE_EXTENSION_TABLES \
> +  EXTENSION_TABLE(00)             \
> +  EXTENSION_TABLE(01)             \
> +  EXTENSION_TABLE(18)             \
> +  EXTENSION_TABLE(71)             \
> +  EXTENSION_TABLE(72)             \
> +  EXTENSION_TABLE(73)             \
> +  EXTENSION_TABLE(ae)             \
> +  EXTENSION_TABLE(b9)             \
> +  EXTENSION_TABLE(ba)             \
> +  EXTENSION_TABLE(c7)
> +
> +#define TWO_BYTE_FULL_EXTENSION_TABLES \
> +  EXTENSION_TABLE(01)
> +
> +
> +using namespace X86Disassembler;
> +
> +/// needsModRMForDecode - Indicates whether a particular instruction requires a
> +///   ModR/M byte for the instruction to be properly decoded.  For example, a
> +///   MRMDestReg instruction needs the Mod field in the ModR/M byte to be set to
> +///   0b11.
> +///
> +/// @param form - The form of the instruction.
> +/// @return     - true if the form implies that a ModR/M byte is required, false
> +///               otherwise.
> +static bool needsModRMForDecode(uint8_t form) {
> +  if (form == X86Local::MRMDestReg    ||
> +     form == X86Local::MRMDestMem    ||
> +     form == X86Local::MRMSrcReg     ||
> +     form == X86Local::MRMSrcMem     ||
> +     (form >= X86Local::MRM0r && form <= X86Local::MRM7r) ||
> +     (form >= X86Local::MRM0m && form <= X86Local::MRM7m))
> +    return true;
> +  else
> +    return false;
> +}
> +
> +/// isRegFormat - Indicates whether a particular form requires the Mod field of
> +///   the ModR/M byte to be 0b11.
> +///
> +/// @param form - The form of the instruction.
> +/// @return     - true if the form implies that Mod must be 0b11, false
> +///               otherwise.
> +static bool isRegFormat(uint8_t form) {
> +  if (form == X86Local::MRMDestReg ||
> +     form == X86Local::MRMSrcReg  ||
> +     (form >= X86Local::MRM0r && form <= X86Local::MRM7r))
> +    return true;
> +  else
> +    return false;
> +}
> +
> +/// byteFromBitsInit - Extracts a value at most 8 bits in width from a BitsInit.
> +///   Useful for switch statements and the like.
> +///
> +/// @param init - A reference to the BitsInit to be decoded.
> +/// @return     - The field, with the first bit in the BitsInit as the lowest
> +///               order bit.
> +static uint8_t byteFromBitsInit(BitsInit &init) {
> +  int width = init.getNumBits();
> +
> +  assert(width <= 8 && "Field is too large for uint8_t!");
> +
> +  int     index;
> +  uint8_t mask = 0x01;
> +
> +  uint8_t ret = 0;
> +
> +  for (index = 0; index < width; index++) {
> +    if (static_cast<BitInit*>(init.getBit(index))->getValue())
> +      ret |= mask;
> +
> +    mask <<= 1;
> +  }
> +
> +  return ret;
> +}
> +
> +/// byteFromRec - Extract a value at most 8 bits in with from a Record given the
> +///   name of the field.
> +///
> +/// @param rec  - The record from which to extract the value.
> +/// @param name - The name of the field in the record.
> +/// @return     - The field, as translated by byteFromBitsInit().
> +static uint8_t byteFromRec(const Record* rec, const std::string &name) {
> +  BitsInit* bits = rec->getValueAsBitsInit(name);
> +  return byteFromBitsInit(*bits);
> +}
> +
> +RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
> +                                     const CodeGenInstruction &insn,
> +                                     InstrUID uid) {
> +  UID = uid;
> +
> +  Rec = insn.TheDef;
> +  Name = Rec->getName();
> +  Spec = &tables.specForUID(UID);
> +
> +  if (!Rec->isSubClassOf("X86Inst")) {
> +    ShouldBeEmitted = false;
> +    return;
> +  }
> +
> +  Prefix   = byteFromRec(Rec, "Prefix");
> +  Opcode   = byteFromRec(Rec, "Opcode");
> +  Form     = byteFromRec(Rec, "FormBits");
> +  SegOvr   = byteFromRec(Rec, "SegOvrBits");
> +
> +  HasOpSizePrefix  = Rec->getValueAsBit("hasOpSizePrefix");
> +  HasREX_WPrefix   = Rec->getValueAsBit("hasREX_WPrefix");
> +  HasLockPrefix    = Rec->getValueAsBit("hasLockPrefix");
> +  IsCodeGenOnly    = Rec->getValueAsBit("isCodeGenOnly");
> +
> +  Name      = Rec->getName();
> +  AsmString = Rec->getValueAsString("AsmString");
> +
> +  Operands = &insn.OperandList;
> +
> +  IsSSE            = HasOpSizePrefix && (Name.find("16") == Name.npos);
> +  HasFROperands    = false;
> +
> +  ShouldBeEmitted  = true;
> +}
> +
> +void RecognizableInstr::processInstr(DisassemblerTables &tables,
> +                                   const CodeGenInstruction &insn,
> +                                   InstrUID uid)
> +{
> +  RecognizableInstr recogInstr(tables, insn, uid);
> +
> +  recogInstr.emitInstructionSpecifier(tables);
> +
> +  if (recogInstr.shouldBeEmitted())
> +    recogInstr.emitDecodePath(tables);
> +}
> +
> +InstructionContext RecognizableInstr::insnContext() const {
> +  InstructionContext insnContext;
> +
> +  if (Name.find("64") != Name.npos || HasREX_WPrefix) {
> +    if (HasREX_WPrefix && HasOpSizePrefix)
> +      insnContext = IC_64BIT_REXW_OPSIZE;
> +    else if (HasOpSizePrefix)
> +      insnContext = IC_64BIT_OPSIZE;
> +    else if (HasREX_WPrefix && Prefix == X86Local::XS)
> +      insnContext = IC_64BIT_REXW_XS;
> +    else if (HasREX_WPrefix && Prefix == X86Local::XD)
> +      insnContext = IC_64BIT_REXW_XD;
> +    else if (Prefix == X86Local::XD)
> +      insnContext = IC_64BIT_XD;
> +    else if (Prefix == X86Local::XS)
> +      insnContext = IC_64BIT_XS;
> +    else if (HasREX_WPrefix)
> +      insnContext = IC_64BIT_REXW;
> +    else
> +      insnContext = IC_64BIT;
> +  } else {
> +    if (HasOpSizePrefix)
> +      insnContext = IC_OPSIZE;
> +    else if (Prefix == X86Local::XD)
> +      insnContext = IC_XD;
> +    else if (Prefix == X86Local::XS)
> +      insnContext = IC_XS;
> +    else
> +      insnContext = IC;
> +  }
> +
> +  return insnContext;
> +}
> +
> +RecognizableInstr::filter_ret RecognizableInstr::filter() const {
> +  // Filter out intrinsics
> +
> +  if (!Rec->isSubClassOf("X86Inst"))
> +    return FILTER_STRONG;
> +
> +  if (Form == X86Local::Pseudo ||
> +      IsCodeGenOnly)
> +    return FILTER_STRONG;
> +
> +  // Filter out instructions with a LOCK prefix;
> +  //   prefer forms that do not have the prefix
> +  if (HasLockPrefix)
> +    return FILTER_WEAK;
> +
> +  // Filter out artificial instructions
> +
> +  if (Name.find("TAILJMP") != Name.npos    ||
> +     Name.find("_Int") != Name.npos       ||
> +     Name.find("_int") != Name.npos       ||
> +     Name.find("Int_") != Name.npos       ||
> +     Name.find("_NOREX") != Name.npos     ||
> +     Name.find("EH_RETURN") != Name.npos  ||
> +     Name.find("V_SET") != Name.npos      ||
> +     Name.find("LOCK_") != Name.npos      ||
> +     Name.find("WIN") != Name.npos)
> +    return FILTER_STRONG;
> +
> +  // Special cases.
> +
> +  if (Name.find("PCMPISTRI") != Name.npos && Name != "PCMPISTRI")
> +    return FILTER_WEAK;
> +  if (Name.find("PCMPESTRI") != Name.npos && Name != "PCMPESTRI")
> +    return FILTER_WEAK;
> +
> +  if (Name.find("MOV") != Name.npos && Name.find("r0") != Name.npos)
> +    return FILTER_WEAK;
> +  if (Name.find("MOVZ") != Name.npos && Name.find("MOVZX") == Name.npos)
> +    return FILTER_WEAK;
> +  if (Name.find("Fs") != Name.npos)
> +    return FILTER_WEAK;
> +  if (Name == "MOVLPDrr"          ||
> +      Name == "MOVLPSrr"          ||
> +      Name == "PUSHFQ"            ||
> +      Name == "BSF16rr"           ||
> +      Name == "BSF16rm"           ||
> +      Name == "BSR16rr"           ||
> +      Name == "BSR16rm"           ||
> +      Name == "MOVSX16rm8"        ||
> +      Name == "MOVSX16rr8"        ||
> +      Name == "MOVZX16rm8"        ||
> +      Name == "MOVZX16rr8"        ||
> +      Name == "PUSH32i16"         ||
> +      Name == "PUSH64i16"         ||
> +      Name == "MOVPQI2QImr"       ||
> +      Name == "MOVSDmr"           ||
> +      Name == "MOVSDrm"           ||
> +      Name == "MOVSSmr"           ||
> +      Name == "MOVSSrm"           ||
> +      Name == "MMX_MOVD64rrv164"  ||
> +      Name == "CRC32m16"          ||
> +      Name == "MOV64ri64i32"      ||
> +      Name == "CRC32r16")
> +    return FILTER_WEAK;
> +
> +  // Filter out instructions with segment override prefixes.
> +  // They're too messy to handle now and we'll special case them if needed.
> +
> +  if (SegOvr)
> +    return FILTER_STRONG;
> +
> +  // Filter out instructions that can't be printed.
> +
> +  if (AsmString.size() == 0)
> +    return FILTER_STRONG;
> +
> +  // Filter out instructions with subreg operands.
> +
> +  if (AsmString.find("subreg") != AsmString.npos)
> +    return FILTER_STRONG;
> +
> +  assert(Form != X86Local::MRMInitReg &&
> +         "FORMAT_MRMINITREG instruction not skipped");
> +
> +  if (HasFROperands && Name.find("MOV") != Name.npos &&
> +     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
> +      (Name.find("to") != Name.npos)))
> +    return FILTER_WEAK;
> +
> +  return FILTER_NORMAL;
> +}
> +
> +void RecognizableInstr::handleOperand(
> +  bool optional,
> +  unsigned &operandIndex,
> +  unsigned &physicalOperandIndex,
> +  unsigned &numPhysicalOperands,
> +  unsigned *operandMapping,
> +  OperandEncoding (*encodingFromString)(const std::string&, bool hasOpSizePrefix)) {
> +  if (optional) {
> +    if (physicalOperandIndex >= numPhysicalOperands)
> +      return;
> +  } else {
> +    assert(physicalOperandIndex < numPhysicalOperands);
> +  }
> +
> +  while (operandMapping[operandIndex] != operandIndex) {
> +    Spec->operands[operandIndex].encoding = ENCODING_DUP;
> +    Spec->operands[operandIndex].type =
> +      (OperandType)(TYPE_DUP0 + operandMapping[operandIndex]);
> +    ++operandIndex;
> +  }
> +
> +  const std::string &typeName = (*Operands)[operandIndex].Rec->getName();
> +
> +  Spec->operands[operandIndex].encoding = encodingFromString(typeName,
> +                                                              HasOpSizePrefix);
> +  Spec->operands[operandIndex].type = typeFromString(typeName,
> +                                                      IsSSE,
> +                                                      HasREX_WPrefix,
> +                                                      HasOpSizePrefix);
> +
> +  ++operandIndex;
> +  ++physicalOperandIndex;
> +}
> +
> +void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
> +  Spec->name       = Name;
> +
> +  if (!Rec->isSubClassOf("X86Inst"))
> +    return;
> +
> +  switch (filter()) {
> +  case FILTER_WEAK:
> +    Spec->filtered = true;
> +    break;
> +  case FILTER_STRONG:
> +    ShouldBeEmitted = false;
> +    return;
> +  case FILTER_NORMAL:
> +    break;
> +  }
> +
> +  Spec->insnContext = insnContext();
> +
> +  const std::vector<CodeGenInstruction::OperandInfo> &OperandList = *Operands;
> +
> +  unsigned operandIndex;
> +  unsigned numOperands = OperandList.size();
> +  unsigned numPhysicalOperands = 0;
> +
> +  // operandMapping maps from operands in OperandList to their originals.
> +  // If operandMapping[i] != i, then the entry is a duplicate.
> +  unsigned operandMapping[X86_MAX_OPERANDS];
> +
> +  bool hasFROperands = false;
> +
> +  assert(numOperands < X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough");
> +
> +  for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
> +    if (OperandList[operandIndex].Constraints.size()) {
> +      const std::string &constraint = OperandList[operandIndex].Constraints[0];
> +      std::string::size_type tiedToPos;
> +
> +      if ((tiedToPos = constraint.find(" << 16) | (1 << TOI::TIED_TO))")) !=
> +         constraint.npos) {
> +        tiedToPos--;
> +        operandMapping[operandIndex] = constraint[tiedToPos] - '0';
> +      } else {
> +        ++numPhysicalOperands;
> +        operandMapping[operandIndex] = operandIndex;
> +      }
> +    } else {
> +      ++numPhysicalOperands;
> +      operandMapping[operandIndex] = operandIndex;
> +    }
> +
> +    const std::string &recName = OperandList[operandIndex].Rec->getName();
> +
> +    if (recName.find("FR") != recName.npos)
> +      hasFROperands = true;
> +  }
> +
> +  if (hasFROperands && Name.find("MOV") != Name.npos &&
> +     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
> +      (Name.find("to") != Name.npos)))
> +    ShouldBeEmitted = false;
> +
> +  if (!ShouldBeEmitted)
> +    return;
> +
> +#define HANDLE_OPERAND(class)               \
> +  handleOperand(false,                      \
> +                operandIndex,               \
> +                physicalOperandIndex,       \
> +                numPhysicalOperands,        \
> +                operandMapping,             \
> +                class##EncodingFromString);
> +
> +#define HANDLE_OPTIONAL(class)              \
> +  handleOperand(true,                       \
> +                operandIndex,               \
> +                physicalOperandIndex,       \
> +                numPhysicalOperands,        \
> +                operandMapping,             \
> +                class##EncodingFromString);
> +
> +  // operandIndex should always be < numOperands
> +  operandIndex = 0;
> +  // physicalOperandIndex should always be < numPhysicalOperands
> +  unsigned physicalOperandIndex = 0;
> +
> +  switch (Form) {
> +  case X86Local::RawFrm:
> +    // Operand 1 (optional) is an address or immediate.
> +    // Operand 2 (optional) is an immediate.
> +    assert(numPhysicalOperands <= 2 &&
> +           "Unexpected number of operands for RawFrm");
> +    HANDLE_OPTIONAL(relocation)
> +    HANDLE_OPTIONAL(immediate)
> +    break;
> +  case X86Local::AddRegFrm:
> +    // Operand 1 is added to the opcode.
> +    // Operand 2 (optional) is an address.
> +    assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 &&
> +           "Unexpected number of operands for AddRegFrm");
> +    HANDLE_OPERAND(opcodeModifier)
> +    HANDLE_OPTIONAL(relocation)
> +    break;
> +  case X86Local::MRMDestReg:
> +    // Operand 1 is a register operand in the R/M field.
> +    // Operand 2 is a register operand in the Reg/Opcode field.
> +    // Operand 3 (optional) is an immediate.
> +    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
> +           "Unexpected number of operands for MRMDestRegFrm");
> +    HANDLE_OPERAND(rmRegister)
> +    HANDLE_OPERAND(roRegister)
> +    HANDLE_OPTIONAL(immediate)
> +    break;
> +  case X86Local::MRMDestMem:
> +    // Operand 1 is a memory operand (possibly SIB-extended)
> +    // Operand 2 is a register operand in the Reg/Opcode field.
> +    // Operand 3 (optional) is an immediate.
> +    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
> +           "Unexpected number of operands for MRMDestMemFrm");
> +    HANDLE_OPERAND(memory)
> +    HANDLE_OPERAND(roRegister)
> +    HANDLE_OPTIONAL(immediate)
> +    break;
> +  case X86Local::MRMSrcReg:
> +    // Operand 1 is a register operand in the Reg/Opcode field.
> +    // Operand 2 is a register operand in the R/M field.
> +    // Operand 3 (optional) is an immediate.
> +    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
> +           "Unexpected number of operands for MRMSrcRegFrm");
> +    HANDLE_OPERAND(roRegister)
> +    HANDLE_OPERAND(rmRegister)
> +    HANDLE_OPTIONAL(immediate)
> +    break;
> +  case X86Local::MRMSrcMem:
> +    // Operand 1 is a register operand in the Reg/Opcode field.
> +    // Operand 2 is a memory operand (possibly SIB-extended)
> +    // Operand 3 (optional) is an immediate.
> +    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
> +           "Unexpected number of operands for MRMSrcMemFrm");
> +    HANDLE_OPERAND(roRegister)
> +    HANDLE_OPERAND(memory)
> +    HANDLE_OPTIONAL(immediate)
> +    break;
> +  case X86Local::MRM0r:
> +  case X86Local::MRM1r:
> +  case X86Local::MRM2r:
> +  case X86Local::MRM3r:
> +  case X86Local::MRM4r:
> +  case X86Local::MRM5r:
> +  case X86Local::MRM6r:
> +  case X86Local::MRM7r:
> +    // Operand 1 is a register operand in the R/M field.
> +    // Operand 2 (optional) is an immediate or relocation.
> +    assert(numPhysicalOperands <= 2 &&
> +           "Unexpected number of operands for MRMnRFrm");
> +    HANDLE_OPTIONAL(rmRegister)
> +    HANDLE_OPTIONAL(relocation)
> +    break;
> +  case X86Local::MRM0m:
> +  case X86Local::MRM1m:
> +  case X86Local::MRM2m:
> +  case X86Local::MRM3m:
> +  case X86Local::MRM4m:
> +  case X86Local::MRM5m:
> +  case X86Local::MRM6m:
> +  case X86Local::MRM7m:
> +    // Operand 1 is a memory operand (possibly SIB-extended)
> +    // Operand 2 (optional) is an immediate or relocation.
> +    assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 &&
> +           "Unexpected number of operands for MRMnMFrm");
> +    HANDLE_OPERAND(memory)
> +    HANDLE_OPTIONAL(relocation)
> +    break;
> +  case X86Local::MRMInitReg:
> +    // Ignored.
> +    break;
> +  }
> +
> +  #undef HANDLE_OPERAND
> +  #undef HANDLE_OPTIONAL
> +}
> +
> +void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
> +  // Special cases where the LLVM tables are not complete
> +
> +#define EXACTCASE(class, name, lastbyte)         \
> +  if (Name == name) {                           \
> +    tables.setTableFields(class,                 \
> +                          insnContext(),         \
> +                          Opcode,               \
> +                          ExactFilter(lastbyte), \
> +                          UID);                 \
> +    Spec->modifierBase = Opcode;               \
> +    return;                                      \
> +  }
> +
> +  EXACTCASE(TWOBYTE, "MONITOR",  0xc8)
> +  EXACTCASE(TWOBYTE, "MWAIT",    0xc9)
> +  EXACTCASE(TWOBYTE, "SWPGS",    0xf8)
> +  EXACTCASE(TWOBYTE, "INVEPT",   0x80)
> +  EXACTCASE(TWOBYTE, "INVVPID",  0x81)
> +  EXACTCASE(TWOBYTE, "VMCALL",   0xc1)
> +  EXACTCASE(TWOBYTE, "VMLAUNCH", 0xc2)
> +  EXACTCASE(TWOBYTE, "VMRESUME", 0xc3)
> +  EXACTCASE(TWOBYTE, "VMXOFF",   0xc4)
> +
> +  if (Name == "INVLPG") {
> +    tables.setTableFields(TWOBYTE,
> +                          insnContext(),
> +                          Opcode,
> +                          ExtendedFilter(false, 7),
> +                          UID);
> +    Spec->modifierBase = Opcode;
> +    return;
> +  }
> +
> +  OpcodeType    opcodeType  = (OpcodeType)-1;
> +
> +  ModRMFilter*  filter      = NULL;
> +  uint8_t       opcodeToSet = 0;
> +
> +  switch (Prefix) {
> +  // Extended two-byte opcodes can start with f2 0f, f3 0f, or 0f
> +  case X86Local::XD:
> +  case X86Local::XS:
> +  case X86Local::TB:
> +    opcodeType = TWOBYTE;
> +
> +    switch (Opcode) {
> +#define EXTENSION_TABLE(n) case 0x##n:
> +    TWO_BYTE_EXTENSION_TABLES
> +#undef EXTENSION_TABLE
> +      switch (Form) {
> +      default:
> +        llvm_unreachable("Unhandled two-byte extended opcode");
> +      case X86Local::MRM0r:
> +      case X86Local::MRM1r:
> +      case X86Local::MRM2r:
> +      case X86Local::MRM3r:
> +      case X86Local::MRM4r:
> +      case X86Local::MRM5r:
> +      case X86Local::MRM6r:
> +      case X86Local::MRM7r:
> +        filter = new ExtendedFilter(true, Form - X86Local::MRM0r);
> +        break;
> +      case X86Local::MRM0m:
> +      case X86Local::MRM1m:
> +      case X86Local::MRM2m:
> +      case X86Local::MRM3m:
> +      case X86Local::MRM4m:
> +      case X86Local::MRM5m:
> +      case X86Local::MRM6m:
> +      case X86Local::MRM7m:
> +        filter = new ExtendedFilter(false, Form - X86Local::MRM0m);
> +        break;
> +      } // switch (Form)
> +      break;
> +    default:
> +      if (needsModRMForDecode(Form))
> +        filter = new ModFilter(isRegFormat(Form));
> +      else
> +        filter = new DumbFilter();
> +
> +      break;
> +    } // switch (opcode)
> +    opcodeToSet = Opcode;
> +    break;
> +  case X86Local::T8:
> +    opcodeType = THREEBYTE_38;
> +    if (needsModRMForDecode(Form))
> +      filter = new ModFilter(isRegFormat(Form));
> +    else
> +      filter = new DumbFilter();
> +    opcodeToSet = Opcode;
> +    break;
> +  case X86Local::TA:
> +    opcodeType = THREEBYTE_3A;
> +    if (needsModRMForDecode(Form))
> +      filter = new ModFilter(isRegFormat(Form));
> +    else
> +      filter = new DumbFilter();
> +    opcodeToSet = Opcode;
> +    break;
> +  case X86Local::D8:
> +  case X86Local::D9:
> +  case X86Local::DA:
> +  case X86Local::DB:
> +  case X86Local::DC:
> +  case X86Local::DD:
> +  case X86Local::DE:
> +  case X86Local::DF:
> +    assert(Opcode >= 0xc0 && "Unexpected opcode for an escape opcode");
> +    opcodeType = ONEBYTE;
> +    if (Form == X86Local::AddRegFrm) {
> +      Spec->modifierType = MODIFIER_MODRM;
> +      Spec->modifierBase = Opcode;
> +      filter = new AddRegEscapeFilter(Opcode);
> +    } else {
> +      filter = new EscapeFilter(true, Opcode);
> +    }
> +    opcodeToSet = 0xd8 + (Prefix - X86Local::D8);
> +    break;
> +  default:
> +    opcodeType = ONEBYTE;
> +    switch (Opcode) {
> +#define EXTENSION_TABLE(n) case 0x##n:
> +    ONE_BYTE_EXTENSION_TABLES
> +#undef EXTENSION_TABLE
> +      switch (Form) {
> +      default:
> +        llvm_unreachable("Fell through the cracks of a single-byte "
> +                         "extended opcode");
> +      case X86Local::MRM0r:
> +      case X86Local::MRM1r:
> +      case X86Local::MRM2r:
> +      case X86Local::MRM3r:
> +      case X86Local::MRM4r:
> +      case X86Local::MRM5r:
> +      case X86Local::MRM6r:
> +      case X86Local::MRM7r:
> +        filter = new ExtendedFilter(true, Form - X86Local::MRM0r);
> +        break;
> +      case X86Local::MRM0m:
> +      case X86Local::MRM1m:
> +      case X86Local::MRM2m:
> +      case X86Local::MRM3m:
> +      case X86Local::MRM4m:
> +      case X86Local::MRM5m:
> +      case X86Local::MRM6m:
> +      case X86Local::MRM7m:
> +        filter = new ExtendedFilter(false, Form - X86Local::MRM0m);
> +        break;
> +      } // switch (Form)
> +      break;
> +    case 0xd8:
> +    case 0xd9:
> +    case 0xda:
> +    case 0xdb:
> +    case 0xdc:
> +    case 0xdd:
> +    case 0xde:
> +    case 0xdf:
> +      filter = new EscapeFilter(false, Form - X86Local::MRM0m);
> +      break;
> +    default:
> +      if (needsModRMForDecode(Form))
> +        filter = new ModFilter(isRegFormat(Form));
> +      else
> +        filter = new DumbFilter();
> +      break;
> +    } // switch (Opcode)
> +    opcodeToSet = Opcode;
> +  } // switch (Prefix)
> +
> +  assert(opcodeType != (OpcodeType)-1 &&
> +         "Opcode type not set");
> +  assert(filter && "Filter not set");
> +
> +  if (Form == X86Local::AddRegFrm) {
> +    if(Spec->modifierType != MODIFIER_MODRM) {
> +      assert(opcodeToSet < 0xf9 &&
> +             "Not enough room for all ADDREG_FRM operands");
> +
> +      uint8_t currentOpcode;
> +
> +      for (currentOpcode = opcodeToSet;
> +           currentOpcode < opcodeToSet + 8;
> +           ++currentOpcode)
> +        tables.setTableFields(opcodeType,
> +                              insnContext(),
> +                              currentOpcode,
> +                              *filter,
> +                              UID);
> +
> +      Spec->modifierType = MODIFIER_OPCODE;
> +      Spec->modifierBase = opcodeToSet;
> +    } else {
> +      // modifierBase was set where MODIFIER_MODRM was set
> +      tables.setTableFields(opcodeType,
> +                            insnContext(),
> +                            opcodeToSet,
> +                            *filter,
> +                            UID);
> +    }
> +  } else {
> +    tables.setTableFields(opcodeType,
> +                          insnContext(),
> +                          opcodeToSet,
> +                          *filter,
> +                          UID);
> +
> +    Spec->modifierType = MODIFIER_NONE;
> +    Spec->modifierBase = opcodeToSet;
> +  }
> +
> +  delete filter;
> +}
> +
> +#define TYPE(str, type) if (s == str) return type;
> +OperandType RecognizableInstr::typeFromString(const std::string &s,
> +                                              bool isSSE,
> +                                              bool hasREX_WPrefix,
> +                                              bool hasOpSizePrefix) {
> +  if (isSSE) {
> +    // For SSE instructions, we ignore the OpSize prefix and force operand
> +    // sizes.
> +    TYPE("GR16",              TYPE_R16)
> +    TYPE("GR32",              TYPE_R32)
> +    TYPE("GR64",              TYPE_R64)
> +  }
> +  if(hasREX_WPrefix) {
> +    // For instructions with a REX_W prefix, a declared 32-bit register encoding
> +    // is special.
> +    TYPE("GR32",              TYPE_R32)
> +  }
> +  if(!hasOpSizePrefix) {
> +    // For instructions without an OpSize prefix, a declared 16-bit register or
> +    // immediate encoding is special.
> +    TYPE("GR16",              TYPE_R16)
> +    TYPE("i16imm",            TYPE_IMM16)
> +  }
> +  TYPE("i16mem",              TYPE_Mv)
> +  TYPE("i16imm",              TYPE_IMMv)
> +  TYPE("i16i8imm",            TYPE_IMMv)
> +  TYPE("GR16",                TYPE_Rv)
> +  TYPE("i32mem",              TYPE_Mv)
> +  TYPE("i32imm",              TYPE_IMMv)
> +  TYPE("i32i8imm",            TYPE_IMM32)
> +  TYPE("GR32",                TYPE_Rv)
> +  TYPE("i64mem",              TYPE_Mv)
> +  TYPE("i64i32imm",           TYPE_IMM64)
> +  TYPE("i64i8imm",            TYPE_IMM64)
> +  TYPE("GR64",                TYPE_R64)
> +  TYPE("i8mem",               TYPE_M8)
> +  TYPE("i8imm",               TYPE_IMM8)
> +  TYPE("GR8",                 TYPE_R8)
> +  TYPE("VR128",               TYPE_XMM128)
> +  TYPE("f128mem",             TYPE_M128)
> +  TYPE("FR64",                TYPE_XMM64)
> +  TYPE("f64mem",              TYPE_M64FP)
> +  TYPE("FR32",                TYPE_XMM32)
> +  TYPE("f32mem",              TYPE_M32FP)
> +  TYPE("RST",                 TYPE_ST)
> +  TYPE("i128mem",             TYPE_M128)
> +  TYPE("i64i32imm_pcrel",     TYPE_REL64)
> +  TYPE("i32imm_pcrel",        TYPE_REL32)
> +  TYPE("SSECC",               TYPE_IMM8)
> +  TYPE("brtarget",            TYPE_RELv)
> +  TYPE("brtarget8",           TYPE_REL8)
> +  TYPE("f80mem",              TYPE_M80FP)
> +  TYPE("lea32mem",            TYPE_M32)
> +  TYPE("lea64_32mem",         TYPE_M64)
> +  TYPE("lea64mem",            TYPE_M64)
> +  TYPE("VR64",                TYPE_MM64)
> +  TYPE("i64imm",              TYPE_IMMv)
> +  TYPE("opaque32mem",         TYPE_M1616)
> +  TYPE("opaque48mem",         TYPE_M1632)
> +  TYPE("opaque80mem",         TYPE_M1664)
> +  TYPE("opaque512mem",        TYPE_M512)
> +  TYPE("SEGMENT_REG",         TYPE_SEGMENTREG)
> +  TYPE("DEBUG_REG",           TYPE_DEBUGREG)
> +  TYPE("CONTROL_REG_32",      TYPE_CR32)
> +  TYPE("CONTROL_REG_64",      TYPE_CR64)
> +  TYPE("offset8",             TYPE_MOFFS8)
> +  TYPE("offset16",            TYPE_MOFFS16)
> +  TYPE("offset32",            TYPE_MOFFS32)
> +  TYPE("offset64",            TYPE_MOFFS64)
> +  errs() << "Unhandled type string " << s << "\n";
> +  llvm_unreachable("Unhandled type string");
> +}
> +#undef TYPE
> +
> +#define ENCODING(str, encoding) if (s == str) return encoding;
> +OperandEncoding RecognizableInstr::immediateEncodingFromString
> +  (const std::string &s,
> +   bool hasOpSizePrefix) {
> +  if(!hasOpSizePrefix) {
> +    // For instructions without an OpSize prefix, a declared 16-bit register or
> +    // immediate encoding is special.
> +    ENCODING("i16imm",        ENCODING_IW)
> +  }
> +  ENCODING("i32i8imm",        ENCODING_IB)
> +  ENCODING("SSECC",           ENCODING_IB)
> +  ENCODING("i16imm",          ENCODING_Iv)
> +  ENCODING("i16i8imm",        ENCODING_IB)
> +  ENCODING("i32imm",          ENCODING_Iv)
> +  ENCODING("i64i32imm",       ENCODING_ID)
> +  ENCODING("i64i8imm",        ENCODING_IB)
> +  ENCODING("i8imm",           ENCODING_IB)
> +  errs() << "Unhandled immediate encoding " << s << "\n";
> +  llvm_unreachable("Unhandled immediate encoding");
> +}
> +
> +OperandEncoding RecognizableInstr::rmRegisterEncodingFromString
> +  (const std::string &s,
> +   bool hasOpSizePrefix) {
> +  ENCODING("GR16",            ENCODING_RM)
> +  ENCODING("GR32",            ENCODING_RM)
> +  ENCODING("GR64",            ENCODING_RM)
> +  ENCODING("GR8",             ENCODING_RM)
> +  ENCODING("VR128",           ENCODING_RM)
> +  ENCODING("FR64",            ENCODING_RM)
> +  ENCODING("FR32",            ENCODING_RM)
> +  ENCODING("VR64",            ENCODING_RM)
> +  errs() << "Unhandled R/M register encoding " << s << "\n";
> +  llvm_unreachable("Unhandled R/M register encoding");
> +}
> +
> +OperandEncoding RecognizableInstr::roRegisterEncodingFromString
> +  (const std::string &s,
> +   bool hasOpSizePrefix) {
> +  ENCODING("GR16",            ENCODING_REG)
> +  ENCODING("GR32",            ENCODING_REG)
> +  ENCODING("GR64",            ENCODING_REG)
> +  ENCODING("GR8",             ENCODING_REG)
> +  ENCODING("VR128",           ENCODING_REG)
> +  ENCODING("FR64",            ENCODING_REG)
> +  ENCODING("FR32",            ENCODING_REG)
> +  ENCODING("VR64",            ENCODING_REG)
> +  ENCODING("SEGMENT_REG",     ENCODING_REG)
> +  ENCODING("DEBUG_REG",       ENCODING_REG)
> +  ENCODING("CONTROL_REG_32",  ENCODING_REG)
> +  ENCODING("CONTROL_REG_64",  ENCODING_REG)
> +  errs() << "Unhandled reg/opcode register encoding " << s << "\n";
> +  llvm_unreachable("Unhandled reg/opcode register encoding");
> +}
> +
> +OperandEncoding RecognizableInstr::memoryEncodingFromString
> +  (const std::string &s,
> +   bool hasOpSizePrefix) {
> +  ENCODING("i16mem",          ENCODING_RM)
> +  ENCODING("i32mem",          ENCODING_RM)
> +  ENCODING("i64mem",          ENCODING_RM)
> +  ENCODING("i8mem",           ENCODING_RM)
> +  ENCODING("f128mem",         ENCODING_RM)
> +  ENCODING("f64mem",          ENCODING_RM)
> +  ENCODING("f32mem",          ENCODING_RM)
> +  ENCODING("i128mem",         ENCODING_RM)
> +  ENCODING("f80mem",          ENCODING_RM)
> +  ENCODING("lea32mem",        ENCODING_RM)
> +  ENCODING("lea64_32mem",     ENCODING_RM)
> +  ENCODING("lea64mem",        ENCODING_RM)
> +  ENCODING("opaque32mem",     ENCODING_RM)
> +  ENCODING("opaque48mem",     ENCODING_RM)
> +  ENCODING("opaque80mem",     ENCODING_RM)
> +  ENCODING("opaque512mem",    ENCODING_RM)
> +  errs() << "Unhandled memory encoding " << s << "\n";
> +  llvm_unreachable("Unhandled memory encoding");
> +}
> +
> +OperandEncoding RecognizableInstr::relocationEncodingFromString
> +  (const std::string &s,
> +   bool hasOpSizePrefix) {
> +  if(!hasOpSizePrefix) {
> +    // For instructions without an OpSize prefix, a declared 16-bit register or
> +    // immediate encoding is special.
> +    ENCODING("i16imm",        ENCODING_IW)
> +  }
> +  ENCODING("i16imm",          ENCODING_Iv)
> +  ENCODING("i16i8imm",        ENCODING_IB)
> +  ENCODING("i32imm",          ENCODING_Iv)
> +  ENCODING("i32i8imm",        ENCODING_IB)
> +  ENCODING("i64i32imm",       ENCODING_ID)
> +  ENCODING("i64i8imm",        ENCODING_IB)
> +  ENCODING("i8imm",           ENCODING_IB)
> +  ENCODING("i64i32imm_pcrel", ENCODING_ID)
> +  ENCODING("i32imm_pcrel",    ENCODING_ID)
> +  ENCODING("brtarget",        ENCODING_Iv)
> +  ENCODING("brtarget8",       ENCODING_IB)
> +  ENCODING("i64imm",          ENCODING_IO)
> +  ENCODING("offset8",         ENCODING_Ia)
> +  ENCODING("offset16",        ENCODING_Ia)
> +  ENCODING("offset32",        ENCODING_Ia)
> +  ENCODING("offset64",        ENCODING_Ia)
> +  errs() << "Unhandled relocation encoding " << s << "\n";
> +  llvm_unreachable("Unhandled relocation encoding");
> +}
> +
> +OperandEncoding RecognizableInstr::opcodeModifierEncodingFromString
> +  (const std::string &s,
> +   bool hasOpSizePrefix) {
> +  ENCODING("RST",             ENCODING_I)
> +  ENCODING("GR32",            ENCODING_Rv)
> +  ENCODING("GR64",            ENCODING_RO)
> +  ENCODING("GR16",            ENCODING_Rv)
> +  ENCODING("GR8",             ENCODING_RB)
> +  errs() << "Unhandled opcode modifier encoding " << s << "\n";
> +  llvm_unreachable("Unhandled opcode modifier encoding");
> +}
> +#undef ENCODING
> \ No newline at end of file
>
> Added: llvm/trunk/utils/TableGen/X86RecognizableInstr.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/X86RecognizableInstr.h?rev=91749&view=auto
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/X86RecognizableInstr.h (added)
> +++ llvm/trunk/utils/TableGen/X86RecognizableInstr.h Fri Dec 18 20:59:52 2009
> @@ -0,0 +1,237 @@
> +//===- X86RecognizableInstr.h - Disassembler instruction spec ----*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file is part of the X86 Disassembler Emitter.
> +// It contains the interface of a single recognizable instruction.
> +// Documentation for the disassembler emitter in general can be found in
> +//  X86DisasemblerEmitter.h.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef X86RECOGNIZABLEINSTR_H
> +#define X86RECOGNIZABLEINSTR_H
> +
> +#include "X86DisassemblerTables.h"
> +
> +#include "CodeGenTarget.h"
> +#include "Record.h"
> +
> +#include "llvm/System/DataTypes.h"
> +#include "llvm/ADT/SmallVector.h"
> +
> +namespace llvm {
> +
> +namespace X86Disassembler {
> +
> +/// RecognizableInstr - Encapsulates all information required to decode a single
> +///   instruction, as extracted from the LLVM instruction tables.  Has methods
> +///   to interpret the information available in the LLVM tables, and to emit the
> +///   instruction into DisassemblerTables.
> +class RecognizableInstr {
> +private:
> +  /// The opcode of the instruction, as used in an MCInst
> +  InstrUID UID;
> +  /// The record from the .td files corresponding to this instruction
> +  const Record* Rec;
> +  /// The prefix field from the record
> +  uint8_t Prefix;
> +  /// The opcode field from the record; this is the opcode used in the Intel
> +  /// encoding and therefore distinct from the UID
> +  uint8_t Opcode;
> +  /// The form field from the record
> +  uint8_t Form;
> +  /// The segment override field from the record
> +  uint8_t SegOvr;
> +  /// The hasOpSizePrefix field from the record
> +  bool HasOpSizePrefix;
> +  /// The hasREX_WPrefix field from the record
> +  bool HasREX_WPrefix;
> +  /// The hasLockPrefix field from the record
> +  bool HasLockPrefix;
> +  /// The isCodeGenOnly filed from the record
> +  bool IsCodeGenOnly;
> +
> +  /// The instruction name as listed in the tables
> +  std::string Name;
> +  /// The AT&T AsmString for the instruction
> +  std::string AsmString;
> +
> +  /// Indicates whether the instruction is SSE
> +  bool IsSSE;
> +  /// Indicates whether the instruction has FR operands - MOVs with FR operands
> +  /// are typically ignored
> +  bool HasFROperands;
> +  /// Indicates whether the instruction should be emitted into the decode
> +  /// tables; regardless, it will be emitted into the instruction info table
> +  bool ShouldBeEmitted;
> +
> +  /// The operands of the instruction, as listed in the CodeGenInstruction.
> +  /// They are not one-to-one with operands listed in the MCInst; for example,
> +  /// memory operands expand to 5 operands in the MCInst
> +  const std::vector<CodeGenInstruction::OperandInfo>* Operands;
> +  /// The description of the instruction that is emitted into the instruction
> +  /// info table
> +  InstructionSpecifier* Spec;
> +
> +  /// insnContext - Returns the primary context in which the instruction is
> +  ///   valid.
> +  ///
> +  /// @return - The context in which the instruction is valid.
> +  InstructionContext insnContext() const;
> +
> +  enum filter_ret {
> +    FILTER_STRONG,    // instruction has no place in the instruction tables
> +    FILTER_WEAK,      // instruction may conflict, and should be eliminated if
> +                      // it does
> +    FILTER_NORMAL     // instruction should have high priority and generate an
> +                      // error if it conflcits with any other FILTER_NORMAL
> +                      // instruction
> +  };
> +
> +  /// filter - Determines whether the instruction should be decodable.  Some
> +  ///   instructions are pure intrinsics and use unencodable operands; many
> +  ///   synthetic instructions are duplicates of other instructions; other
> +  ///   instructions only differ in the logical way in which they are used, and
> +  ///   have the same decoding.  Because these would cause decode conflicts,
> +  ///   they must be filtered out.
> +  ///
> +  /// @return - The degree of filtering to be applied (see filter_ret).
> +  filter_ret filter() const;
> +
> +  /// typeFromString - Translates an operand type from the string provided in
> +  ///   the LLVM tables to an OperandType for use in the operand specifier.
> +  ///
> +  /// @param s              - The string, as extracted by calling Rec->getName()
> +  ///                         on a CodeGenInstruction::OperandInfo.
> +  /// @param isSSE          - Indicates whether the instruction is an SSE
> +  ///                         instruction.  For SSE instructions, immediates are
> +  ///                         fixed-size rather than being affected by the
> +  ///                         mandatory OpSize prefix.
> +  /// @param hasREX_WPrefix - Indicates whether the instruction has a REX.W
> +  ///                         prefix.  If it does, 32-bit register operands stay
> +  ///                         32-bit regardless of the operand size.
> +  /// @param hasOpSizePrefix- Indicates whether the instruction has an OpSize
> +  ///                         prefix.  If it does not, then 16-bit register
> +  ///                         operands stay 16-bit.
> +  /// @return               - The operand's type.
> +  static OperandType typeFromString(const std::string& s,
> +                                    bool isSSE,
> +                                    bool hasREX_WPrefix,
> +                                    bool hasOpSizePrefix);
> +
> +  /// immediateEncodingFromString - Translates an immediate encoding from the
> +  ///   string provided in the LLVM tables to an OperandEncoding for use in
> +  ///   the operand specifier.
> +  ///
> +  /// @param s                - See typeFromString().
> +  /// @param hasOpSizePrefix  - Indicates whether the instruction has an OpSize
> +  ///                           prefix.  If it does not, then 16-bit immediate
> +  ///                           operands stay 16-bit.
> +  /// @return                 - The operand's encoding.
> +  static OperandEncoding immediateEncodingFromString(const std::string &s,
> +                                                     bool hasOpSizePrefix);
> +
> +  /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but
> +  ///   handles operands that are in the REG field of the ModR/M byte.
> +  static OperandEncoding rmRegisterEncodingFromString(const std::string &s,
> +                                                      bool hasOpSizePrefix);
> +
> +  /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but
> +  ///   handles operands that are in the REG field of the ModR/M byte.
> +  static OperandEncoding roRegisterEncodingFromString(const std::string &s,
> +                                                      bool hasOpSizePrefix);
> +  static OperandEncoding memoryEncodingFromString(const std::string &s,
> +                                                  bool hasOpSizePrefix);
> +  static OperandEncoding relocationEncodingFromString(const std::string &s,
> +                                                      bool hasOpSizePrefix);
> +  static OperandEncoding opcodeModifierEncodingFromString(const std::string &s,
> +                                                          bool hasOpSizePrefix);
> +
> +  /// handleOperand - Converts a single operand from the LLVM table format to
> +  ///   the emitted table format, handling any duplicate operands it encounters
> +  ///   and then one non-duplicate.
> +  ///
> +  /// @param optional             - Determines whether to assert that the
> +  ///                               operand exists.
> +  /// @param operandIndex         - The index into the generated operand table.
> +  ///                               Incremented by this function one or more
> +  ///                               times to reflect possible duplicate
> +  ///                               operands).
> +  /// @param physicalOperandIndex - The index of the current operand into the
> +  ///                               set of non-duplicate ('physical') operands.
> +  ///                               Incremented by this function once.
> +  /// @param numPhysicalOperands  - The number of non-duplicate operands in the
> +  ///                               instructions.
> +  /// @param operandMapping       - The operand mapping, which has an entry for
> +  ///                               each operand that indicates whether it is a
> +  ///                               duplicate, and of what.
> +  void handleOperand(bool optional,
> +                     unsigned &operandIndex,
> +                     unsigned &physicalOperandIndex,
> +                     unsigned &numPhysicalOperands,
> +                     unsigned *operandMapping,
> +                     OperandEncoding (*encodingFromString)
> +                       (const std::string&,
> +                        bool hasOpSizePrefix));
> +
> +  /// shouldBeEmitted - Returns the shouldBeEmitted field.  Although filter()
> +  ///   filters out many instructions, at various points in decoding we
> +  ///   determine that the instruction should not actually be decodable.  In
> +  ///   particular, MMX MOV instructions aren't emitted, but they're only
> +  ///   identified during operand parsing.
> +  ///
> +  /// @return - true if at this point we believe the instruction should be
> +  ///   emitted; false if not.  This will return false if filter() returns false
> +  ///   once emitInstructionSpecifier() has been called.
> +  bool shouldBeEmitted() const {
> +    return ShouldBeEmitted;
> +  }
> +
> +  /// emitInstructionSpecifier - Loads the instruction specifier for the current
> +  ///   instruction into a DisassemblerTables.
> +  ///
> +  /// @arg tables - The DisassemblerTables to populate with the specifier for
> +  ///               the current instruction.
> +  void emitInstructionSpecifier(DisassemblerTables &tables);
> +
> +  /// emitDecodePath - Populates the proper fields in the decode tables
> +  ///   corresponding to the decode paths for this instruction.
> +  ///
> +  /// @arg tables - The DisassemblerTables to populate with the decode
> +  ///               decode information for the current instruction.
> +  void emitDecodePath(DisassemblerTables &tables) const;
> +
> +  /// Constructor - Initializes a RecognizableInstr with the appropriate fields
> +  ///   from a CodeGenInstruction.
> +  ///
> +  /// @arg tables - The DisassemblerTables that the specifier will be added to.
> +  /// @arg insn   - The CodeGenInstruction to extract information from.
> +  /// @arg uid    - The unique ID of the current instruction.
> +  RecognizableInstr(DisassemblerTables &tables,
> +                    const CodeGenInstruction &insn,
> +                    InstrUID uid);
> +public:
> +  /// processInstr - Accepts a CodeGenInstruction and loads decode information
> +  ///   for it into a DisassemblerTables if appropriate.
> +  ///
> +  /// @arg tables - The DiassemblerTables to be populated with decode
> +  ///               information.
> +  /// @arg insn   - The CodeGenInstruction to be used as a source for this
> +  ///               information.
> +  /// @uid        - The unique ID of the instruction.
> +  static void processInstr(DisassemblerTables &tables,
> +                           const CodeGenInstruction &insn,
> +                           InstrUID uid);
> +};
> +
> +} // namespace X86Disassembler
> +
> +} // namespace llvm
> +
> +#endif
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>