PATCH: R600/SI: Experimental assembler / inline assembly support

Matt Arsenault Matthew.Arsenault at amd.com
Fri Mar 13 10:47:38 PDT 2015


On 03/12/2015 07:45 PM, Tom Stellard wrote:
> Hi,
>
> The attached patches add experimental assembler and inline assembly
> support to the R600 backend.  Not all instructions are supported yet,
> but the most of non-image instructions should work.
>
> I've only tested with the SI encodings, Sea Islands and Volcanic Islands
> are probably less complete.
>
> -Tom
>
>
>
> 0001-R600-SI-Refactor-VOP1-instruction-defs.patch
>
>
>  From a7bf8de2f3c92e8df8b407d168b505705bbc5eea Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Sun, 22 Feb 2015 14:18:21 -0500
> Subject: [PATCH 1/3] R600/SI: Refactor VOP1 instruction defs
>
> ---
>   lib/Target/R600/SIInstrInfo.td | 19 ++++++++++++-------
>   1 file changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index b557b06..5deb957 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -845,23 +845,28 @@ class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>     field bits<9> src0;
>   }
>   
> +class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
> +  VOP1<op.SI, outs, ins, asm, []>,
> +  SIMCInstr <opName#"_e32", SISubtarget.SI>;
> +
> +class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
> +  VOP1<op.VI, outs, ins, asm, []>,
> +  SIMCInstr <opName#"_e32", SISubtarget.VI>;
> +
>   multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
>                      string opName> {
>     def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
>   
> -  def _si : VOP1<op.SI, outs, ins, asm, []>,
> -            SIMCInstr <opName#"_e32", SISubtarget.SI>;
> -  def _vi : VOP1<op.VI, outs, ins, asm, []>,
> -            SIMCInstr <opName#"_e32", SISubtarget.VI>;
> +  def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
> +
> +  def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>;
>   }
>   
>   multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
>                      string opName> {
>     def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
>   
> -  def _si : VOP1<op.SI, outs, ins, asm, []>,
> -            SIMCInstr <opName#"_e32", SISubtarget.SI>;
> -  // No VI instruction. This class is for SI only.
> +  def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
>   }
>   
>   class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
> -- 1.8.1.5
>
> 0002-R600-SI-Refactor-VOP2-instruction-defs.patch
>
>
>  From f9093bf3a49e3444690ff0b1106ab66bf12da248 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Mon, 23 Feb 2015 08:35:07 -0500
> Subject: [PATCH 2/3] R600/SI: Refactor VOP2 instruction defs
>
> ---
>   lib/Target/R600/SIInstrInfo.td | 19 +++++++++++++------
>   1 file changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 5deb957..86e1082 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -877,13 +877,20 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>     let isCodeGenOnly = 1;
>   }
>   
> +class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
> +  VOP2 <op.SI, outs, ins, opName#asm, []>,
> +  SIMCInstr <opName#"_e32", SISubtarget.SI>;
> +
> +class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
> +  VOP2 <op.SI, outs, ins, opName#asm, []>,
> +  SIMCInstr <opName#"_e32", SISubtarget.VI>;
> +
>   multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
>                        string opName, string revOp> {
>     def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
>              VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
>   
> -  def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
> -            SIMCInstr <opName#"_e32", SISubtarget.SI>;
> +  def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
>   }
>   
>   multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
> @@ -891,10 +898,10 @@ multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
>     def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
>              VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
>   
> -  def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
> -            SIMCInstr <opName#"_e32", SISubtarget.SI>;
> -  def _vi : VOP2 <op.VI, outs, ins, opName#asm, []>,
> -            SIMCInstr <opName#"_e32", SISubtarget.VI>;
> +  def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
> +
> +  def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>;
> +
>   }
>   
>   class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
> -- 1.8.1.5
>
> 0003-R600-SI-Initial-support-for-assembler-and-inline-ass.patch
>
>
>  From fe790ee0a1f821665a7a0efb4a138078326b8df0 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Fri, 14 Nov 2014 06:22:05 -0500
> Subject: [PATCH 3/3] R600/SI: Initial support for assembler and inline
>   assembly
>
> This is currently considered experimental, but most of the more
> commonly used instructions should work.
>
> So far only SI has been extensively tested, CI and VI probably work too,
> but may be buggy.  The current set of tests cases do not give complete
> coverage, but I think it is sufficient for an experimental assembler.
>
> See the documentation in R600Usage for more information.
> ---
>   docs/R600Usage.rst                                |   60 +-
>   lib/Target/R600/AMDGPU.td                         |   24 +-
>   lib/Target/R600/AMDGPUAsmPrinter.cpp              |   22 +
>   lib/Target/R600/AMDGPUAsmPrinter.h                |    4 +
>   lib/Target/R600/AMDGPUSubtarget.cpp               |    1 +
>   lib/Target/R600/AMDGPUSubtarget.h                 |    3 +
>   lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp     | 1100 +++++++++++++++++++--
>   lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp |    5 +-
>   lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h   |    2 +
>   lib/Target/R600/SIISelLowering.cpp                |   38 +
>   lib/Target/R600/SIISelLowering.h                  |    4 +
>   lib/Target/R600/SIInstrFormats.td                 |   18 +-
>   lib/Target/R600/SIInstrInfo.td                    |  228 ++++-
>   lib/Target/R600/SIInstructions.td                 |   15 +-
>   lib/Target/R600/SIRegisterInfo.td                 |   47 +-
>   test/MC/R600/ds-err.s                             |   23 +
>   test/MC/R600/ds.s                                 |  337 +++++++
>   test/MC/R600/mubuf.s                              |  352 +++++++
>   test/MC/R600/smrd.s                               |   32 +
>   test/MC/R600/sop1-err.s                           |   22 +
>   test/MC/R600/sop1.s                               |  174 ++++
>   test/MC/R600/sop2.s                               |  131 +++
>   test/MC/R600/sopc.s                               |    9 +
>   test/MC/R600/sopp.s                               |    3 +-
>   test/MC/R600/vop1.s                               |  182 ++++
>   test/MC/R600/vop2-err.s                           |   35 +
>   test/MC/R600/vop2.s                               |  242 +++++
>   test/MC/R600/vop3.s                               |  138 +++
>   test/MC/R600/vopc.s                               |   40 +
>   29 files changed, 3148 insertions(+), 143 deletions(-)
>   create mode 100644 test/MC/R600/ds-err.s
>   create mode 100644 test/MC/R600/ds.s
>   create mode 100644 test/MC/R600/mubuf.s
>   create mode 100644 test/MC/R600/smrd.s
>   create mode 100644 test/MC/R600/sop1-err.s
>   create mode 100644 test/MC/R600/sop1.s
>   create mode 100644 test/MC/R600/sop2.s
>   create mode 100644 test/MC/R600/sopc.s
>   create mode 100644 test/MC/R600/vop1.s
>   create mode 100644 test/MC/R600/vop2-err.s
>   create mode 100644 test/MC/R600/vop2.s
>   create mode 100644 test/MC/R600/vop3.s
>   create mode 100644 test/MC/R600/vopc.s
>
> diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst
> index 48a30c8..c3f8d7e 100644
> --- a/docs/R600Usage.rst
> +++ b/docs/R600Usage.rst
> @@ -6,22 +6,51 @@ Introduction
>   ============
>   
>   The R600 back-end provides ISA code generation for AMD GPUs, starting with
> -the R600 family up until the current Sea Islands (GCN Gen 2).
> +the R600 family up until the current Volcanic Islands (GCN Gen 3).
>   
>   
>   Assembler
>   =========
>   
> -The assembler is currently a work in progress and not yet complete.  Below
> -are the currently supported features.
> +The assembler is currently considered experimental.
> +
> +For syntax examples look in test/MC/R600.
> +
> +Below some of the currently supported features (modulo bugs).  These
> +all apply to the Southern Islands ISA, Sea Islands and Volcanic Islands
> +are also supported but may be missing some instructions and have more bugs:
> +
> +DS Instructions
> +---------------
> +All DS instructions are supported.
> +
> +MUBUF Instructions
> +------------------
> +All non-atmoic MUBUF instructions are supported.
Typo atmoic

> +
> +SMRD Instructions
> +-----------------
> +Only the s_load_dword* SMRD instructions are supported.
> +
> +SOP1 Instructions
> +-----------------
> +All SOP1 instructions are supported.
> +
> +SOP2 Instructions
> +-----------------
> +All SOP2 instructions are supported.
> +
> +SOPC Instructions
> +-----------------
> +All SOPC instructions are supported.
>   
>   SOPP Instructions
>   -----------------
>   
> -Unless otherwise mentioned, all SOPP instructions that with an operand
> -accept a integer operand(s) only.  No verification is performed on the
> -operands, so it is up to the programmer to be familiar with the range
> -or acceptable values.
> +Unless otherwise mentioned, all SOPP instructions that have one or more
> +operands accept integer operands only.  No verification is performed
> +on the operands, so it is up to the programmer to be familiar with the
> +range or acceptable values.
>   
>   s_waitcnt
>   ^^^^^^^^^
> @@ -41,3 +70,20 @@ wait for.
>      // Wait for vmcnt counter to be 1.
>      s_waitcnt vmcnt(1)
>   
> +VOP1, VOP2, VOP3, VOPC Instructions
> +-----------------------------------
> +
> +All 32-bit and 64-bit encodings should work.
> +
> +The assembler will automatically detect which encoding size to use for
> +VOP1, VOP2, and VOPC instructions based on the operands.  If you want to force
> +a specific encoding size, you can add an _e32 (for 32-bit encoding) or
> +_e64 (for 64-bit encoding) suffix to the instruction.  Most, but not all
> +instructions support an explicit suffix.  These are all valid assembly
> +strings:
> +
> +.. code-block:: nasm
> +
> +   v_mul_i32_i24 v1, v2, v3
> +   v_mul_i32_i24_e32 v1, v2, v3
> +   v_mul_i32_i24_e64 v1, v2, v3
> diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
> index e5d5ce2..2eb805e 100644
> --- a/lib/Target/R600/AMDGPU.td
> +++ b/lib/Target/R600/AMDGPU.td
> @@ -133,6 +133,20 @@ class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
>           !cast<string>(Value),
>           "The size of local memory in bytes">;
>   
> +def FeatureGCN : SubtargetFeature<"gcn",
> +        "IsGCN",
> +        "true",
> +        "GCN or newer GPU">;
> +
> +def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
> +        "GCN1Encoding",
> +        "true",
> +        "Encoding format for SI and CI">;
> +
> +def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
> +        "GCN3Encoding",
> +        "true",
> +        "Encoding format for VI">;
>   class SubtargetFeatureGeneration <string Value,
>                                     list<SubtargetFeature> Implies> :
>           SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
> @@ -158,15 +172,17 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
>   
>   def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
>           [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
> -         FeatureWavefrontSize64]>;
> +         FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding]>;
>   
>   def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
>           [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
> -         FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
> +         FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
> +         FeatureGCN1Encoding]>;
>   
>   def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
>           [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
> -         FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
> +         FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
> +         FeatureGCN3Encoding]>;
>   
>   //===----------------------------------------------------------------------===//
>   
> @@ -197,8 +213,10 @@ def NullALU : InstrItinClass;
>   
>   class PredicateControl {
>     Predicate SubtargetPredicate;
> +  list<Predicate> AssemblerPredicates = [];
>     list<Predicate> OtherPredicates = [];
>     list<Predicate> Predicates = !listconcat([SubtargetPredicate],
> +                                            AssemblerPredicates,
>                                               OtherPredicates);
>   }
>   
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> index 5e1b6a3..b7a48c3 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> @@ -17,6 +17,7 @@
>   //
>   
>   #include "AMDGPUAsmPrinter.h"
> +#include "InstPrinter/AMDGPUInstPrinter.h"
>   #include "AMDGPU.h"
>   #include "AMDKernelCodeT.h"
>   #include "AMDGPUSubtarget.h"
> @@ -577,3 +578,24 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
>   
>     OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header)));
>   }
> +
> +bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
> +                                       unsigned AsmVariant,
> +                                       const char *ExtraCode, raw_ostream &O) {
> +  if (ExtraCode && ExtraCode[0]) {
> +    if (ExtraCode[1] != 0)
> +      return true; // Unknown modifier.
> +
> +    switch (ExtraCode[0]) {
> +    default:
> +      // See if this is a generic print operand
> +      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
> +    case 'r':
> +      break;
> +    }
> +  }
> +
> +  AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
> +                                     *TM.getSubtargetImpl()->getRegisterInfo());
> +  return false;
> +}
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
> index 58ffb1e..824cc43 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.h
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.h
> @@ -99,6 +99,10 @@ public:
>   
>     void EmitEndOfAsmFile(Module &M) override;
>   
> +  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
> +                       unsigned AsmVariant, const char *ExtraCode,
> +                       raw_ostream &O);
> +
>   protected:
>     std::vector<std::string> DisasmLines, HexLines;
>     size_t DisasmLineMaxLen;
> diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
> index 0ead652..259224a 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.cpp
> +++ b/lib/Target/R600/AMDGPUSubtarget.cpp
> @@ -71,6 +71,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
>         EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
>         WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
>         EnableVGPRSpilling(false), SGPRInitBug(false),
> +      IsGCN(false), GCN1Encoding(false), GCN3Encoding(false),
>         FrameLowering(TargetFrameLowering::StackGrowsUp,
>                       64 * 16, // Maximum stack alignment (long16)
>                       0),
> diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
> index 403a3e4..aeb0817 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.h
> +++ b/lib/Target/R600/AMDGPUSubtarget.h
> @@ -71,6 +71,9 @@ private:
>     int LocalMemorySize;
>     bool EnableVGPRSpilling;
>     bool SGPRInitBug;
> +  bool IsGCN;
> +  bool GCN1Encoding;
> +  bool GCN3Encoding;
>   
>     AMDGPUFrameLowering FrameLowering;
>     std::unique_ptr<AMDGPUTargetLowering> TLInfo;
> diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> index 3b4ba1a..972d23f 100644
> --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> @@ -8,6 +8,8 @@
>   //===----------------------------------------------------------------------===//
>   
>   #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
> +#include "SIDefines.h"
> +#include "llvm/ADT/APFloat.h"
>   #include "llvm/ADT/SmallString.h"
>   #include "llvm/ADT/SmallVector.h"
>   #include "llvm/ADT/STLExtras.h"
> @@ -27,77 +29,108 @@
>   #include "llvm/Support/SourceMgr.h"
>   #include "llvm/Support/TargetRegistry.h"
>   #include "llvm/Support/raw_ostream.h"
> +#include "llvm/Support/Debug.h"
>   
>   using namespace llvm;
>   
>   namespace {
>   
> -class AMDGPUAsmParser : public MCTargetAsmParser {
> -  MCSubtargetInfo &STI;
> -  MCAsmParser &Parser;
> -
> -
> -  /// @name Auto-generated Match Functions
> -  /// {
> -
> -#define GET_ASSEMBLER_HEADER
> -#include "AMDGPUGenAsmMatcher.inc"
> -
> -  /// }
> -
> -public:
> -  AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
> -               const MCInstrInfo &_MII,
> -               const MCTargetOptions &Options)
> -      : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
> -    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
> -  }
> -  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
> -  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
> -                               OperandVector &Operands, MCStreamer &Out,
> -                               uint64_t &ErrorInfo,
> -                               bool MatchingInlineAsm) override;
> -  bool ParseDirective(AsmToken DirectiveID) override;
> -  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
> -  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
> -                        SMLoc NameLoc, OperandVector &Operands) override;
> -
> -  bool parseCnt(int64_t &IntVal);
> -  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
> -};
> +struct OptionalOperand;
>   
>   class AMDGPUOperand : public MCParsedAsmOperand {
>     enum KindTy {
>       Token,
> -    Immediate
> +    Immediate,
> +    Register,
> +    Expression
>     } Kind;
>   
> +  SMLoc StartLoc, EndLoc;
> +
>   public:
>     AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
>   
> +  MCContext *Ctx;
> +
> +  enum ImmTy {
> +    ImmTyNone,
> +    ImmTyDSOffset0,
> +    ImmTyDSOffset1,
> +    ImmTyGDS,
> +    ImmTyOffset,
> +    ImmTyGLC,
> +    ImmTySLC,
> +    ImmTyTFE,
> +    ImmTyClamp,
> +    ImmTyOMod
> +  };
> +
>     struct TokOp {
>       const char *Data;
>       unsigned Length;
>     };
>   
>     struct ImmOp {
> +    bool IsFPImm;
> +    ImmTy Type;
>       int64_t Val;
>     };
>   
> +  struct RegOp {
> +    unsigned RegNo;
> +    int Modifiers;
> +    const MCRegisterInfo *TRI;
> +  };
> +
>     union {
>       TokOp Tok;
>       ImmOp Imm;
> +    RegOp Reg;
> +    const MCExpr *Expr;
>     };
>   
>     void addImmOperands(MCInst &Inst, unsigned N) const {
> -    Inst.addOperand(MCOperand::CreateImm(getImm()));
> -  }
> -  void addRegOperands(MCInst &Inst, unsigned N) const {
> -    llvm_unreachable("addRegOperands");
> +    if (Imm.IsFPImm)
> +      Inst.addOperand(MCOperand::CreateFPImm(getImm()));
> +    else
> +      Inst.addOperand(MCOperand::CreateImm(getImm()));
Should this be bitcasting and only creating immediates for fpimm? Other 
parts have started assuming they will never see an fpimm (I thought I 
already removed fpimm handling from the instprinter but it seems to 
still be there)
>     }
> +
>     StringRef getToken() const {
>       return StringRef(Tok.Data, Tok.Length);
>     }
> +
> +  void addRegOperands(MCInst &Inst, unsigned N) const {
> +    Inst.addOperand(MCOperand::CreateReg(getReg()));
> +  }
> +
> +  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
> +    if (isReg())
> +      addRegOperands(Inst, N);
> +    else
> +      addImmOperands(Inst, N);
> +  }
> +
> +  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
> +    Inst.addOperand(MCOperand::CreateImm(Reg.Modifiers));
> +    addRegOperands(Inst, N);
> +  }
> +
> +  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
> +    if (isImm())
> +      addImmOperands(Inst, N);
> +    else {
> +      assert(isExpr());
> +      Inst.addOperand(MCOperand::CreateExpr(Expr));
> +    }
> +  }
> +
> +  bool defaultTokenHasSuffix() const {
> +    StringRef Token(Tok.Data, Tok.Length);
> +
> +    return Token.endswith("_e32") || Token.endswith("_e64");
> +  }
> +
>     bool isToken() const override {
>       return Kind == Token;
>     }
> @@ -106,52 +139,367 @@ public:
>       return Kind == Immediate;
>     }
>   
> +  bool isInlineImm() const {
> +    float F = APInt(32, Imm.Val).bitsToFloat();
> +    return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
> +           (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
> +           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
> +  }
What about the new 0.5 pi on VI? This should at least have a fixme for 
it. There is also BitsToFloat rather than going through APInt
> +
> +  bool isDSOffset0() const {
> +    assert(isImm());
> +    return Imm.Type == ImmTyDSOffset0;
> +  }
> +
> +  bool isDSOffset1() const {
> +    assert(isImm());
> +    return Imm.Type == ImmTyDSOffset1;
> +  }
> +
>     int64_t getImm() const {
>       return Imm.Val;
>     }
>   
> +  enum ImmTy getImmTy() const {
> +    assert(isImm());
> +    return Imm.Type;
> +  }
> +
>     bool isReg() const override {
> -    return false;
> +    return Kind == Register && Reg.Modifiers == -1;
> +  }
> +
> +  bool isRegWithInputMods() const {
> +    return Kind == Register && Reg.Modifiers != -1;
> +  }
> +
> +  void setModifiers(unsigned Mods) {
> +    assert(isReg());
> +    Reg.Modifiers = Mods;
>     }
>   
>     unsigned getReg() const override {
> -    return 0;
> +    return Reg.RegNo;
> +  }
> +
> +  bool isRegOrImm() const {
> +    return isReg() || isImm();
> +  }
> +
> +  bool isRegClass(unsigned RCID) const {
> +    return Reg.TRI->getRegClass(RCID).contains(getReg());
> +  }
> +
> +  bool isSCSrc32() const {
> +    return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
> +  }
> +
> +  bool isSSrc32() const {
> +    return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
> +  }
> +
> +  bool isSSrc64() const {
> +    return isImm() || isInlineImm() ||
> +           (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
> +  }
> +
> +  bool isVCSrc32() const {
> +    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
> +  }
> +
> +  bool isVCSrc64() const {
> +    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
> +  }
> +
> +  bool isVSrc32() const {
> +    return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
> +  }
> +
> +  bool isVSrc64() const {
> +    return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
>     }
>   
>     bool isMem() const override {
>       return false;
>     }
>   
> +  bool isExpr() const {
> +    return Kind == Expression;
> +  }
> +
> +  bool isSoppBrTarget() const {
> +    return isExpr() || isImm();
> +  }
> +
>     SMLoc getStartLoc() const override {
> -    return SMLoc();
> +    return StartLoc;
>     }
>   
>     SMLoc getEndLoc() const override {
> -    return SMLoc();
> +    return EndLoc;
>     }
>   
>     void print(raw_ostream &OS) const override { }
>   
> -  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
> +  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
> +                                                  enum ImmTy Type = ImmTyNone,
> +                                                  bool IsFPImm = false) {
>       auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
>       Op->Imm.Val = Val;
> +    Op->Imm.IsFPImm = IsFPImm;
> +    Op->Imm.Type = Type;
> +    Op->StartLoc = Loc;
> +    Op->EndLoc = Loc;
>       return Op;
>     }
>   
> -  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
> +  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
> +                                           bool HasExplicitEncodingSize = true) {
>       auto Res = llvm::make_unique<AMDGPUOperand>(Token);
>       Res->Tok.Data = Str.data();
>       Res->Tok.Length = Str.size();
> +    Res->StartLoc = Loc;
> +    Res->EndLoc = Loc;
>       return Res;
>     }
>   
> +  static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
> +                                                  SMLoc E,
> +                                                  const MCRegisterInfo *TRI) {
> +    auto Op = llvm::make_unique<AMDGPUOperand>(Register);
> +    Op->Reg.RegNo = RegNo;
> +    Op->Reg.TRI = TRI;
> +    Op->Reg.Modifiers = -1;
> +    Op->StartLoc = S;
> +    Op->EndLoc = E;
> +    return Op;
> +  }
> +
> +  static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) {
> +    auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
> +    Op->Expr = Expr;
> +    Op->StartLoc = S;
> +    Op->EndLoc = S;
> +    return Op;
> +  }
> +
> +  bool isDSOffset() const;
> +  bool isDSOffset01() const;
>     bool isSWaitCnt() const;
> +  bool isMubufOffset() const;
> +};
> +
> +class AMDGPUAsmParser : public MCTargetAsmParser {
> +  MCSubtargetInfo &STI;
> +  const MCInstrInfo &MII;
> +  MCAsmParser &Parser;
> +
> +  unsigned ForcedEncodingSize;
> +  /// @name Auto-generated Match Functions
> +  /// {
> +
> +#define GET_ASSEMBLER_HEADER
> +#include "AMDGPUGenAsmMatcher.inc"
> +
> +  /// }
> +
> +public:
> +  AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
> +               const MCInstrInfo &_MII,
> +               const MCTargetOptions &Options)
> +      : MCTargetAsmParser(), STI(_STI), MII(_MII), Parser(_Parser),
> +        ForcedEncodingSize(0){
> +
> +    if (!STI.getFeatureBits()) {
> +      // Set default features.
> +      STI.ToggleFeature("SOUTHERN_ISLANDS");
> +    }
> +
> +    uint64_t Features = STI.getFeatureBits();
> +    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
> +  }
> +
> +  unsigned getForcedEncodingSize() const {
> +    return ForcedEncodingSize;
> +  }
> +
> +  void setForcedEncodingSize(unsigned Size) {
> +    ForcedEncodingSize = Size;
> +  }
> +
> +  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
> +  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
> +  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
> +                               OperandVector &Operands, MCStreamer &Out,
> +                               uint64_t &ErrorInfo,
> +                               bool MatchingInlineAsm) override;
> +  bool ParseDirective(AsmToken DirectiveID) override;
> +  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
> +  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
> +                        SMLoc NameLoc, OperandVector &Operands) override;
> +
> +  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
> +                                          int64_t Default = 0);
> +  OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
> +                                          OperandVector &Operands,
> +                                          enum AMDGPUOperand::ImmTy ImmTy =
> +                                                      AMDGPUOperand::ImmTyNone);
> +  OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
> +                                     enum AMDGPUOperand::ImmTy ImmTy =
> +                                                      AMDGPUOperand::ImmTyNone);
> +  OperandMatchResultTy parseOptionalOps(
> +                                   const ArrayRef<OptionalOperand> &OptionalOps,
> +                                   OperandVector &Operands);
> +
> +
> +  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
> +  void cvtDS(MCInst &Inst, const OperandVector &Operands);
> +  OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
> +  OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
> +  OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
> +
> +  bool parseCnt(int64_t &IntVal);
> +  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
> +  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
> +
> +  void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
> +  OperandMatchResultTy parseOffset(OperandVector &Operands);
> +  OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
> +  OperandMatchResultTy parseGLC(OperandVector &Operands);
> +  OperandMatchResultTy parseSLC(OperandVector &Operands);
> +  OperandMatchResultTy parseTFE(OperandVector &Operands);
> +
> +  OperandMatchResultTy parseDMask(OperandVector &Operands);
> +  OperandMatchResultTy parseUNorm(OperandVector &Operands);
> +  OperandMatchResultTy parseR128(OperandVector &Operands);
> +
> +  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
> +  OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
> +};
> +
> +struct OptionalOperand {
> +  const char *Name;
> +  AMDGPUOperand::ImmTy Type;
> +  bool IsBit;
> +  int64_t Default;
> +  bool (*ConvertResult)(int64_t&);
>   };
>   
>   }
>   
> +static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
> +  if (IsVgpr) {
> +    switch (RegWidth) {
> +      default: llvm_unreachable("Unknown register width");
> +      case 1: return AMDGPU::VGPR_32RegClassID;
> +      case 2: return AMDGPU::VReg_64RegClassID;
> +      case 3: return AMDGPU::VReg_96RegClassID;
> +      case 4: return AMDGPU::VReg_128RegClassID;
> +      case 8: return AMDGPU::VReg_256RegClassID;
> +      case 16: return AMDGPU::VReg_512RegClassID;
> +    }
> +  }
> +
> +  switch (RegWidth) {
> +    default: llvm_unreachable("Unknown register width");
> +    case 1: return AMDGPU::SGPR_32RegClassID;
> +    case 2: return AMDGPU::SGPR_64RegClassID;
> +    case 4: return AMDGPU::SReg_128RegClassID;
> +    case 8: return AMDGPU::SReg_256RegClassID;
> +    case 16: return AMDGPU::SReg_512RegClassID;
> +  }
> +}
> +
> +static unsigned getRegForName(const StringRef &RegName) {
> +
> +  return StringSwitch<unsigned>(RegName)
> +    .Case("exec", AMDGPU::EXEC)
> +    .Case("vcc", AMDGPU::VCC)
> +    .Case("flat_scr", AMDGPU::FLAT_SCR)
> +    .Case("m0", AMDGPU::M0)
> +    .Case("scc", AMDGPU::SCC)
> +    .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
> +    .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
> +    .Case("vcc_lo", AMDGPU::VCC_LO)
> +    .Case("vcc_hi", AMDGPU::VCC_HI)
> +    .Case("exec_lo", AMDGPU::EXEC_LO)
> +    .Case("exec_hi", AMDGPU::EXEC_HI)
> +    .Default(0);
> +}
> +
>   bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
> -  return true;
> +  const AsmToken Tok = Parser.getTok();
> +  StartLoc = Tok.getLoc();
> +  EndLoc = Tok.getEndLoc();
> +  const StringRef &RegName = Tok.getString();
> +  RegNo = getRegForName(RegName);
> +
> +  if (RegNo) {
> +    Parser.Lex();
> +    return false;
> +  }
> +
> +  // Match vgprs and sgprs
> +  if (RegName[0] != 's' && RegName[0] != 'v')
> +    return true;
> +
> +  bool IsVgpr = RegName[0] == 'v';
> +  unsigned RegWidth;
> +  unsigned RegIndexInClass;
> +  if (RegName.size() > 1) {
> +    // We have a 32-bit register
> +    RegWidth = 1;
> +    if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
> +      return true;
> +    Parser.Lex();
> +  } else {
> +    // We have a register greater than 32-bits.
> +
> +    int64_t RegLo, RegHi;
> +    Parser.Lex();
> +    if (getLexer().isNot(AsmToken::LBrac))
> +      return true;
> +
> +    Parser.Lex();
> +    if (getParser().parseAbsoluteExpression(RegLo))
> +      return true;
> +
> +    if (getLexer().isNot(AsmToken::Colon))
> +      return true;
> +
> +    Parser.Lex();
> +    if (getParser().parseAbsoluteExpression(RegHi))
> +      return true;
> +
> +    if (getLexer().isNot(AsmToken::RBrac))
> +      return true;
> +
> +    Parser.Lex();
> +    RegWidth = (RegHi - RegLo) + 1;
> +    if (IsVgpr) {
> +      // VGPR registers aren't aligned.
> +      RegIndexInClass = RegLo;
> +    } else {
> +      // SGPR registers are aligned.  Max alignment is 4 dwords.
> +      RegIndexInClass = RegLo / std::min(RegWidth, 4u);
> +    }
> +  }
> +
> +  const MCRegisterInfo *TRC = getContext().getRegisterInfo();
> +  unsigned RC = getRegClass(IsVgpr, RegWidth);
> +  RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
> +  return false;
> +}
> +
> +unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
> +
> +  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
> +
> +  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
> +      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
> +    return Match_InvalidOperand;
> +
> +  return Match_Success;
>   }
>   
>   
> @@ -163,22 +511,30 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
>     MCInst Inst;
>   
>     switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
> -  case Match_Success:
> -    Inst.setLoc(IDLoc);
> -    Out.EmitInstruction(Inst, STI);
> -    return false;
> -  case Match_MissingFeature:
> -    return Error(IDLoc, "instruction use requires an option to be enabled");
> -  case Match_MnemonicFail:
> -    return Error(IDLoc, "unrecognized instruction mnemonic");
> -  case Match_InvalidOperand: {
> -    if (ErrorInfo != ~0ULL) {
> -      if (ErrorInfo >= Operands.size())
> -        return Error(IDLoc, "too few operands for instruction");
> +    default: break;
> +    case Match_Success:
> +      Inst.setLoc(IDLoc);
> +      Out.EmitInstruction(Inst, STI);
> +      return false;
> +    case Match_MissingFeature:
> +      return Error(IDLoc, "missing feature");
> +
> +    case Match_MnemonicFail:
> +      return Error(IDLoc, "unrecognized instruction mnemonic");
> +
> +    case Match_InvalidOperand: {
> +      SMLoc ErrorLoc = IDLoc;
> +      if (ErrorInfo != ~0ULL) {
> +        if (ErrorInfo >= Operands.size()) {
> +          return Error(IDLoc, "too few operands for instruction");
> +        }
>   
> +        ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
Casting to a reference always looks weird, and there are a lot of these 
in this patch. Why do you need to do this? Can you not cast the pointer 
type before the deref and use -> for some weird reason?
> +        if (ErrorLoc == SMLoc())
> +          ErrorLoc = IDLoc;
> +      }
> +      return Error(ErrorLoc, "invalid operand for instruction");
>       }
> -    return Error(IDLoc, "invalid operand for instruction");
> -  }
>     }
>     llvm_unreachable("Implement any new match types added!");
>   }
> @@ -187,6 +543,19 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
>     return true;
>   }
>   
> +static bool operandsHaveModifiers(const OperandVector &Operands) {
> +
> +  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
> +    const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
> +    if (Op.isRegWithInputMods())
> +      return true;
> +    if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
> +                       Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
> +      return true;
> +  }
> +  return false;
> +}
> +
>   AMDGPUAsmParser::OperandMatchResultTy
>   AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>   
> @@ -195,17 +564,104 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>   
>     // If we successfully parsed the operand or if there as an error parsing,
>     // we are done.
> -  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
> +  //
> +  // If we are parsing after we reach EndOfStatement then this means we
> +  // are appending default values to the Operands list.  This is only done
> +  // by custom parser, so we shouldn't continue on to the generic parsing.
> +  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
> +      getLexer().is(AsmToken::EndOfStatement))
>       return ResTy;
>   
> +  bool Negate = false, Abs = false;
> +  if (getLexer().getKind()== AsmToken::Minus) {
> +    Parser.Lex();
> +    Negate = true;
> +  }
> +
> +  if (getLexer().getKind() == AsmToken::Pipe) {
> +    Parser.Lex();
> +    Abs = true;
> +  }
> +
>     switch(getLexer().getKind()) {
>       case AsmToken::Integer: {
> +      SMLoc S = Parser.getTok().getLoc();
> +      int64_t IntVal;
> +      if (getParser().parseAbsoluteExpression(IntVal))
> +        return MatchOperand_ParseFail;
> +      APInt IntVal32(32, IntVal);
> +      if (IntVal32.getSExtValue() != IntVal) {
> +        Error(S, "invalid immediate: only 32-bit values are legal");
> +        return MatchOperand_ParseFail;
> +      }
> +
> +      IntVal = IntVal32.getSExtValue();
> +      if (Negate)
> +        IntVal *= -1;
> +      Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
> +      return MatchOperand_Success;
> +    }
> +    case AsmToken::Real: {
> +      // FIXME: We should emit an error if a double precisions floating-point
> +      // value is used.  I'm not sure the best way to detect this.
> +      SMLoc S = Parser.getTok().getLoc();
>         int64_t IntVal;
>         if (getParser().parseAbsoluteExpression(IntVal))
>           return MatchOperand_ParseFail;
> -      Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
> +
> +      APFloat F((float)APInt(64, IntVal).bitsToDouble());
You should be able to avoid using the host float cast here
> +      if (Negate)
> +        F.changeSign();
> +      Operands.push_back(
> +          AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
>         return MatchOperand_Success;
>       }
> +    case AsmToken::Identifier: {
> +      SMLoc S, E;
> +      unsigned RegNo;
> +      if (!ParseRegister(RegNo, S, E)) {
> +
> +        bool HasModifiers = operandsHaveModifiers(Operands);
> +        unsigned Modifiers = 0;
> +
> +        if (Negate)
> +          Modifiers |= 0x1;
> +
> +        if (Abs) {
> +          if (getLexer().getKind() != AsmToken::Pipe)
> +            return MatchOperand_ParseFail;
> +          Parser.Lex();
> +          Modifiers |= 0x2;
> +        }
> +
> +        if (Modifiers && !HasModifiers) {
> +          // We are adding a modifier to src1 or src2 and previous sources
> +          // don't have modifiers, so we need to go back and empty modifers
> +          // for each previous source.
> +          for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
> +               --PrevRegIdx) {
> +
> +            AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
> +            RegOp.setModifiers(0);
> +          }
> +        }
> +
> +
> +        Operands.push_back(AMDGPUOperand::CreateReg(
> +            RegNo, S, E, getContext().getRegisterInfo()));
> +
> +        if (HasModifiers || Modifiers) {
> +          AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
> +          RegOp.setModifiers(Modifiers);
> +
> +        }
> +     }  else {
> +      Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
> +                                                    S));
> +      Parser.Lex();
> +     }
> +     return MatchOperand_Success;
> +    }
>       default:
>         return MatchOperand_NoMatch;
>     }
> @@ -214,23 +670,283 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>   bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
>                                          StringRef Name,
>                                          SMLoc NameLoc, OperandVector &Operands) {
> +
> +  // Clear any forced encodings from the previous instruction.
> +  setForcedEncodingSize(0);
> +
> +  if (Name.endswith("_e64"))
> +    setForcedEncodingSize(64);
> +  else if (Name.endswith("_e32"))
> +    setForcedEncodingSize(32);
> +
>     // Add the instruction mnemonic
>     Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
>   
> -  if (getLexer().is(AsmToken::EndOfStatement))
> -    return false;
> +  while (!getLexer().is(AsmToken::EndOfStatement)) {
> +    AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
> +
> +    // Eat the comma or space if there is one.
> +    if (getLexer().is(AsmToken::Comma))
> +      Parser.Lex();
>   
> -  AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
> -  switch (Res) {
> -    case MatchOperand_Success: return false;
> -    case MatchOperand_ParseFail: return Error(NameLoc,
> -                                              "Failed parsing operand");
> -    case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
> +    switch (Res) {
> +      case MatchOperand_Success: break;
> +      case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
> +                                                "failed parsing operand.");
> +      case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
> +                                              "not a valid operand.");
> +    }
>     }
> -  return true;
> +
> +  // Once we reach end of statement, continue parsing so we can add default
> +  // values for optional arguments.
> +  AMDGPUAsmParser::OperandMatchResultTy Res;
> +  while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) {
> +    if (Res != MatchOperand_Success)
> +      return Error(getLexer().getLoc(), "failed parsing operand.");
> +  }
> +  return false;
>   }
>   
>   //===----------------------------------------------------------------------===//
> +// Utility functions
> +//===----------------------------------------------------------------------===//
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
> +                                    int64_t Default) {
> +
> +  // We are at the end of the statement, and this is a default argument, so
> +  // use a default value.
> +  if (getLexer().is(AsmToken::EndOfStatement)) {
> +    Int = Default;
> +    return MatchOperand_Success;
> +  }
> +
> +  switch(getLexer().getKind()) {
> +    default: return MatchOperand_NoMatch;
> +    case AsmToken::Identifier: {
> +      StringRef OffsetName = Parser.getTok().getString();
> +      if (!OffsetName.equals(Prefix))
> +        return MatchOperand_NoMatch;
> +
> +      Parser.Lex();
> +      if (getLexer().isNot(AsmToken::Colon))
> +        return MatchOperand_ParseFail;
> +
> +      Parser.Lex();
> +      if (getLexer().isNot(AsmToken::Integer))
> +        return MatchOperand_ParseFail;
> +
> +      if (getParser().parseAbsoluteExpression(Int))
> +        return MatchOperand_ParseFail;
> +      break;
> +    }
> +  }
> +  return MatchOperand_Success;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
> +                                    enum AMDGPUOperand::ImmTy ImmTy) {
> +
> +  SMLoc S = Parser.getTok().getLoc();
> +  int64_t Offset = 0;
> +
> +  AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
> +  if (Res != MatchOperand_Success)
> +    return Res;
> +
> +  Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
> +  return MatchOperand_Success;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
> +                               enum AMDGPUOperand::ImmTy ImmTy) {
> +  int64_t Bit = 0;
> +  SMLoc S = Parser.getTok().getLoc();
> +
> +  // We are at the end of the statement, and this is a default argument, so
> +  // use a default value.
> +  if (getLexer().isNot(AsmToken::EndOfStatement)) {
> +    switch(getLexer().getKind()) {
> +      case AsmToken::Identifier: {
> +        StringRef Tok = Parser.getTok().getString();
> +        if (Tok == Name) {
> +          Bit = 1;
> +          Parser.Lex();
> +        } else if (Tok.startswith("no") && Tok.endswith(Name)) {
> +          Bit = 0;
> +          Parser.Lex();
> +        } else {
> +          return MatchOperand_NoMatch;
> +        }
> +        break;
> +      }
> +      default:
> +        return MatchOperand_NoMatch;
> +    }
> +  }
> +
> +  Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
> +  return MatchOperand_Success;
> +}
> +
> +static bool operandsHasOptionalOp(const OperandVector &Operands,
> +                                  const OptionalOperand &OOp) {
> +  for (unsigned i = 0; i < Operands.size(); i++) {
e = Operands.size() and ++i
> +    const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]);
> +    if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) ||
> +        (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name))
> +      return true;
> +
> +  }
> +  return false;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
> +                                   OperandVector &Operands) {
> +  SMLoc S = Parser.getTok().getLoc();
> +  for (const OptionalOperand &Op : OptionalOps) {
> +    if (operandsHasOptionalOp(Operands, Op))
> +      continue;
> +    AMDGPUAsmParser::OperandMatchResultTy Res;
> +    int64_t Value;
> +    if (Op.IsBit) {
> +      Res = parseNamedBit(Op.Name, Operands, Op.Type);
> +      if (Res == MatchOperand_NoMatch)
> +        continue;
> +      return Res;
> +    }
> +
> +    Res = parseIntWithPrefix(Op.Name, Value, Op.Default);
> +
> +    if (Res == MatchOperand_NoMatch)
> +      continue;
> +
> +    if (Res != MatchOperand_Success)
> +      return Res;
> +
> +    if (Op.ConvertResult && !Op.ConvertResult(Value)) {
> +      return MatchOperand_ParseFail;
> +    }
> +
> +    Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type));
> +    return MatchOperand_Success;
> +  }
> +  return MatchOperand_NoMatch;
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// ds
> +//===----------------------------------------------------------------------===//
> +
> +static const OptionalOperand DSOptionalOps [] = {
> +  {"offset",  AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
> +  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
> +};
> +
> +static const OptionalOperand DSOptionalOpsOff01 [] = {
> +  {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
> +  {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
> +  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
> +};
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
> +  return parseOptionalOps(DSOptionalOps, Operands);
> +}
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
> +  return parseOptionalOps(DSOptionalOpsOff01, Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
> +  SMLoc S = Parser.getTok().getLoc();
> +  AMDGPUAsmParser::OperandMatchResultTy Res =
> +    parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
> +  if (Res == MatchOperand_NoMatch) {
> +    Operands.push_back(AMDGPUOperand::CreateImm(0, S,
> +                       AMDGPUOperand::ImmTyOffset));
> +    Res = MatchOperand_Success;
> +  }
> +  return Res;
> +}
> +
> +bool AMDGPUOperand::isDSOffset() const {
> +  return isImm() && isUInt<16>(getImm());
> +}
> +
> +bool AMDGPUOperand::isDSOffset01() const {
> +  return isImm() && isUInt<8>(getImm());
> +}
> +
> +void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
> +                                    const OperandVector &Operands) {
> +
> +  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> +
> +  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
> +    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> +    // Add the register arguments
> +    if (Op.isReg()) {
> +      Op.addRegOperands(Inst, 1);
> +      continue;
> +    }
> +
> +    // Handle optional arguments
> +    OptionalIdx[Op.getImmTy()] = i;
> +  }
> +
> +  unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0];
> +  unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1];
> +  unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
> +
> +  ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0
> +  ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1
> +  ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
> +  Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
> +}
> +
> +void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
> +
> +  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> +  bool GDSOnly = false;
> +
> +  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
> +    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> +    // Add the register arguments
> +    if (Op.isReg()) {
> +      Op.addRegOperands(Inst, 1);
> +      continue;
> +    }
> +
> +    if (Op.isToken() && Op.getToken() == "gds") {
> +      GDSOnly = true;
> +      continue;
> +    }
> +
> +    // Handle optional arguments
> +    OptionalIdx[Op.getImmTy()] = i;
> +  }
> +
> +  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
> +  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset
> +
> +  if (!GDSOnly) {
> +    unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
> +    ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
> +  }
> +  Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
> +}
> +
> +
> +//===----------------------------------------------------------------------===//
>   // s_waitcnt
>   //===----------------------------------------------------------------------===//
>   
> @@ -284,6 +1000,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
>     // expcnt  [6:4]
>     // lgkmcnt [10:8]
>     int64_t CntVal = 0x77f;
> +  SMLoc S = Parser.getTok().getLoc();
>   
>     switch(getLexer().getKind()) {
>       default: return MatchOperand_ParseFail;
> @@ -300,7 +1017,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
>         } while(getLexer().isNot(AsmToken::EndOfStatement));
>         break;
>     }
> -  Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
> +  Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
>     return MatchOperand_Success;
>   }
>   
> @@ -308,6 +1025,247 @@ bool AMDGPUOperand::isSWaitCnt() const {
>     return isImm();
>   }
>   
> +//===----------------------------------------------------------------------===//
> +// sopp branch targets
> +//===----------------------------------------------------------------------===//
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
> +  SMLoc S = Parser.getTok().getLoc();
> +
> +  switch (getLexer().getKind()) {
> +    default: return MatchOperand_ParseFail;
> +    case AsmToken::Integer: {
> +      int64_t Imm;
> +      if (getParser().parseAbsoluteExpression(Imm))
> +        return MatchOperand_ParseFail;
> +      Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
> +      return MatchOperand_Success;
> +    }
> +
> +    case AsmToken::Identifier:
> +      Operands.push_back(AMDGPUOperand::CreateExpr(
> +          MCSymbolRefExpr::Create(getContext().GetOrCreateSymbol(
> +                                  Parser.getTok().getString()), getContext()), S));
> +      Parser.Lex();
> +      return MatchOperand_Success;
> +  }
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// mubuf
> +//===----------------------------------------------------------------------===//
> +
> +static const OptionalOperand MubufOptionalOps [] = {
> +  {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
> +  {"glc",    AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
> +  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
> +  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
> +};
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
> +  return parseOptionalOps(MubufOptionalOps, Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
> +  return parseIntWithPrefix("offset", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
> +  return parseNamedBit("glc", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
> +  return parseNamedBit("slc", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
> +  return parseNamedBit("tfe", Operands);
> +}
> +
> +bool AMDGPUOperand::isMubufOffset() const {
> +  return isImm() && isUInt<12>(getImm());
> +}
> +
> +void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
> +                               const OperandVector &Operands) {
> +  unsigned i = 1;
> +
> +  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> +
> +  for (unsigned e = Operands.size(); i != e; ++i) {
This loop condition looks weird. You don't seem to be using the i after 
the loop, so its definition should move into the for
> +    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> +    // Add the register arguments
> +    if (Op.isReg()) {
> +      Op.addRegOperands(Inst, 1);
> +      continue;
> +    }
> +
> +    // Handle the case where soffset is an immediate
> +    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
> +      Op.addImmOperands(Inst, 1);
> +      continue;
> +    }
> +
> +    // Handle tokens like 'offen' which are sometimes hard-coded into the
> +    // asm string.  There are no MCInst operands for these.
> +    if (Op.isToken()) {
> +      continue;
> +    }
> +    assert(Op.isImm());
> +
> +    // Handle optional arguments
> +    OptionalIdx[Op.getImmTy()] = i;
> +  }
> +
> +  assert(OptionalIdx.size() == 4);
> +
> +  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
> +  unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
> +  unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
> +  unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
> +
> +  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
> +  ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
> +  ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
> +  ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
Is this defaulting Offset/GLC/SLC/TFE to 1? Shouldn't these be 0?
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// mimg
> +//===----------------------------------------------------------------------===//
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDMask(OperandVector &Operands) {
> +  return parseIntWithPrefix("dmask", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseUNorm(OperandVector &Operands) {
> +  return parseNamedBit("unorm", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseR128(OperandVector &Operands) {
> +  return parseNamedBit("r128", Operands);
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// vop3
> +//===----------------------------------------------------------------------===//
> +
> +static bool ConvertOmodMul(int64_t &Mul) {
> +  if (Mul != 1 && Mul != 2 && Mul != 4)
> +    return false;
> +
> +  Mul >>= 1;
> +  return true;
> +}
> +
> +static bool ConvertOmodDiv(int64_t &Div) {
> +  if (Div == 1) {
> +    Div = 0;
> +    return true;
> +  }
> +
> +  if (Div == 2) {
> +    Div = 3;
> +    return true;
> +  }
> +
> +  return false;
> +}
> +
> +static const OptionalOperand VOP3OptionalOps [] = {
> +  {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
> +  {"mul",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
> +  {"div",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
> +};
> +
> +static bool isVOP3(OperandVector &Operands) {
> +  if (operandsHaveModifiers(Operands))
> +    return true;
> +
> +  AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
> +
> +  if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
> +    return true;
> +
> +  if (Operands.size() >= 5)
> +    return true;
> +
> +  if (Operands.size() > 3) {
> +    AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
> +    if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
> +                            Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)))
> +      return true;
> +  }
> +  return false;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
> +
> +  // The value returned by this function may change after parsing
> +  // an operand so store the original value here.
> +  bool HasModifiers = operandsHaveModifiers(Operands);
> +
> +  bool IsVOP3 = isVOP3(Operands);
> +  if (HasModifiers || IsVOP3 ||
> +      getLexer().isNot(AsmToken::EndOfStatement) ||
> +      getForcedEncodingSize() == 64) {
> +
> +    AMDGPUAsmParser::OperandMatchResultTy Res =
> +        parseOptionalOps(VOP3OptionalOps, Operands);
> +
> +    if (!HasModifiers && Res == MatchOperand_Success) {
> +      // We have added a modifier operation, so we need to make sure all
> +      // previous register operans have modifiers
Typo: operans

> +      for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
> +        AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
> +        if (Op.isReg())
> +          Op.setModifiers(0);
> +      }
> +    }
> +    return Res;
> +  }
> +  return MatchOperand_NoMatch;
> +}
> +
> +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
> +  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
> +  unsigned i = 2;
> +
> +  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> +
> +  if (operandsHaveModifiers(Operands)) {
> +    for (unsigned e = Operands.size(); i != e; ++i) {
> +      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> +      if (Op.isRegWithInputMods()) {
> +        ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2);
> +        continue;
> +      }
> +      OptionalIdx[Op.getImmTy()] = i;
> +    }
> +
> +    unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp];
> +    unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod];
> +
> +    ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1);
> +    ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1);
> +  } else {
> +    for (unsigned e = Operands.size(); i != e; ++i)
> +      ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1);
> +  }
> +}
> +
>   /// Force static initialization.
>   extern "C" void LLVMInitializeR600AsmParser() {
>     RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> index d62fd3f..11d7219 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -123,7 +123,8 @@ void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
>       O << " tfe";
>   }
>   
> -void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
> +void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
> +                                        const MCRegisterInfo &MRI) {
>     switch (reg) {
>     case AMDGPU::VCC:
>       O << "vcc";
> @@ -293,7 +294,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
>         break;
>   
>       default:
> -      printRegOperand(Op.getReg(), O);
> +      printRegOperand(Op.getReg(), O, MRI);
>         break;
>       }
>     } else if (Op.isImm()) {
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> index 5289718..b82e388 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> @@ -30,6 +30,8 @@ public:
>     static const char *getRegisterName(unsigned RegNo);
>   
>     void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
> +  static void printRegOperand(unsigned RegNo, raw_ostream &O,
> +                              const MCRegisterInfo &MRI);
>   
>   private:
>     void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index af38c94..9148970 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -32,6 +32,7 @@
>   #include "llvm/CodeGen/SelectionDAG.h"
>   #include "llvm/IR/Function.h"
>   #include "llvm/ADT/SmallString.h"
> +#include "llvm/Support/Debug.h"
>   
>   using namespace llvm;
>   
> @@ -2082,3 +2083,40 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
>     return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
>                               cast<RegisterSDNode>(VReg)->getReg(), VT);
>   }
> +
> +//===----------------------------------------------------------------------===//
> +//                         SI Inline Assembly Support
> +//===----------------------------------------------------------------------===//
> +
> +std::pair<unsigned, const TargetRegisterClass *>
> +SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
> +                                               const std::string &Constraint,
StringRef operand instead?
> +                                               MVT VT) const {
> +  dbgs() << "Constraint = " << Constraint << "\n";
> +  dbgs() << "VT = " << EVT(VT).getEVTString() << "\n";
Leftover debug printing
> +  if (Constraint == "r") {
> +    switch(VT.SimpleTy) {
> +      default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
> +      case MVT::i64:
> +        return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
> +      case MVT::i32:
> +        return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
> +    }
> +  }
> +
> +  if (Constraint.size() > 1) {
> +    const TargetRegisterClass *RC = nullptr;
> +    if (Constraint[1] == 'v') {
> +      RC = &AMDGPU::VGPR_32RegClass;
> +    } else if (Constraint[1] == 's') {
> +      RC = &AMDGPU::SGPR_32RegClass;
> +    }
> +
> +    if (RC) {
> +      unsigned Idx = std::atoi(Constraint.substr(2).c_str());
> +      if (Idx < RC->getNumRegs())
> +        return std::make_pair(RC->getRegister(Idx), RC);
> +    }
> +  }
> +  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
> +}
> diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
> index 92f5847..168de4c 100644
> --- a/lib/Target/R600/SIISelLowering.h
> +++ b/lib/Target/R600/SIISelLowering.h
> @@ -113,6 +113,10 @@ public:
>     MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
>                                     SDLoc DL,
>                                     SDValue Ptr) const;
> +
> +  std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
> +                                   const TargetRegisterInfo *TRI,
> +                                   const std::string &Constraint, MVT VT) const;
>   };
>   
>   } // End namespace llvm
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 4167590..fa04d23 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -130,6 +130,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
>     let AddedComplexity = -1000;
>   
>     let VOP3 = 1;
> +  let VALU = 1;
> +
> +  let AsmMatchConverter = "cvtVOP3";
> +  let isCodeGenOnly = 0;
> +
>     int Size = 8;
>   }
>   
> @@ -208,6 +213,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
>     let mayLoad = 0;
>     let mayStore = 0;
>     let hasSideEffects = 0;
> +  let isCodeGenOnly = 0;
>     let SALU = 1;
>     let SOP1 = 1;
>   }
> @@ -218,6 +224,7 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
>     let mayLoad = 0;
>     let mayStore = 0;
>     let hasSideEffects = 0;
> +  let isCodeGenOnly = 0;
>     let SALU = 1;
>     let SOP2 = 1;
>   
> @@ -233,6 +240,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
>     let hasSideEffects = 0;
>     let SALU = 1;
>     let SOPC = 1;
> +  let isCodeGenOnly = 0;
>   
>     let UseNamedOperandTable = 1;
>   }
> @@ -550,10 +558,14 @@ let Uses = [EXEC] in {
>   
>   class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
>       VOP1Common <outs, ins, asm, pattern>,
> -    VOP1e<op>;
> +    VOP1e<op> {
> +  let isCodeGenOnly = 0;
> +}
>   
>   class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
> -    VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
> +    VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
> +  let isCodeGenOnly = 0;
> +}
>   
>   class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
>       VOPCCommon <ins, asm, pattern>, VOPCe <op>;
> @@ -586,6 +598,7 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
>     let mayStore = 1;
>   
>     let hasSideEffects = 0;
> +  let AsmMatchConverter = "cvtDS";
>     let SchedRW = [WriteLDS];
>   }
>   
> @@ -598,6 +611,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
>   
>     let hasSideEffects = 0;
>     let UseNamedOperandTable = 1;
> +  let AsmMatchConverter = "cvtMubuf";
>     let SchedRW = [WriteVMEM];
>   }
>   
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 86e1082..b2a414a 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -6,6 +6,15 @@
>   // License. See LICENSE.TXT for details.
>   //
>   //===----------------------------------------------------------------------===//
> +def isSICI : Predicate<
> +  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
> +  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
> +>, AssemblerPredicate<"FeatureGCN1Encoding">;
> +def isCI : Predicate<"Subtarget->getGeneration() "
> +                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
> +def isVI : Predicate <
> +  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
> +  AssemblerPredicate<"FeatureGCN3Encoding">;
>   
>   class vop {
>     field bits<9> SI3;
> @@ -233,14 +242,88 @@ def FRAMEri32 : Operand<iPTR> {
>     let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
>   }
>   
> +def SoppBrTarget : AsmOperandClass {
> +  let Name = "SoppBrTarget";
> +  let ParserMethod = "parseSOppBrTarget";
> +}
> +
>   def sopp_brtarget : Operand<OtherVT> {
>     let EncoderMethod = "getSOPPBrEncoding";
>     let OperandType = "OPERAND_PCREL";
> +  let ParserMatchClass = SoppBrTarget;
>   }
>   
>   include "SIInstrFormats.td"
>   include "VIInstrFormats.td"
>   
> +def MubufOffsetMatchClass : AsmOperandClass {
> +  let Name = "MubufOffset";
> +  let ParserMethod = "parseMubufOptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +class DSOffsetBaseMatchClass <string parser> : AsmOperandClass {
> +  let Name = "DSOffset"#parser;
> +  let ParserMethod = parser;
> +  let RenderMethod = "addImmOperands";
> +  let PredicateMethod = "isDSOffset";
> +}
> +
> +def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">;
> +def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">;
> +
> +def DSOffset01MatchClass : AsmOperandClass {
> +  let Name = "DSOffset1";
> +  let ParserMethod = "parseDSOff01OptionalOps";
> +  let RenderMethod = "addImmOperands";
> +  let PredicateMethod = "isDSOffset01";
> +}
> +
> +class GDSBaseMatchClass <string parser> : AsmOperandClass {
> +  let Name = "GDS"#parser;
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = parser;
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">;
> +def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
> +
> +def GLCMatchClass : AsmOperandClass {
> +  let Name = "GLC";
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = "parseMubufOptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +def SLCMatchClass : AsmOperandClass {
> +  let Name = "SLC";
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = "parseMubufOptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +def TFEMatchClass : AsmOperandClass {
> +  let Name = "TFE";
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = "parseMubufOptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +def OModMatchClass : AsmOperandClass {
> +  let Name = "OMod";
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = "parseVOP3OptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +def ClampMatchClass : AsmOperandClass {
> +  let Name = "Clamp";
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = "parseVOP3OptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
>   let OperandType = "OPERAND_IMMEDIATE" in {
>   
>   def offen : Operand<i1> {
> @@ -254,35 +337,52 @@ def addr64 : Operand<i1> {
>   }
>   def mbuf_offset : Operand<i16> {
>     let PrintMethod = "printMBUFOffset";
> +  let ParserMatchClass = MubufOffsetMatchClass;
>   }
> -def ds_offset : Operand<i16> {
> +class ds_offset_base <AsmOperandClass mc> : Operand<i16> {
>     let PrintMethod = "printDSOffset";
> +  let ParserMatchClass = mc;
>   }
> +def ds_offset : ds_offset_base <DSOffsetMatchClass>;
> +def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>;
> +
>   def ds_offset0 : Operand<i8> {
>     let PrintMethod = "printDSOffset0";
> +  let ParserMatchClass = DSOffset01MatchClass;
>   }
>   def ds_offset1 : Operand<i8> {
>     let PrintMethod = "printDSOffset1";
> +  let ParserMatchClass = DSOffset01MatchClass;
>   }
> -def gds : Operand <i1> {
> +class gds_base <AsmOperandClass mc> : Operand <i1> {
>     let PrintMethod = "printGDS";
> +  let ParserMatchClass = mc;
>   }
> +def gds : gds_base <GDSMatchClass>;
> +
> +def gds01 : gds_base <GDS01MatchClass>;
> +
>   def glc : Operand <i1> {
>     let PrintMethod = "printGLC";
> +  let ParserMatchClass = GLCMatchClass;
>   }
>   def slc : Operand <i1> {
>     let PrintMethod = "printSLC";
> +  let ParserMatchClass = SLCMatchClass;
>   }
>   def tfe : Operand <i1> {
>     let PrintMethod = "printTFE";
> +  let ParserMatchClass = TFEMatchClass;
>   }
>   
>   def omod : Operand <i32> {
>     let PrintMethod = "printOModSI";
> +  let ParserMatchClass = OModMatchClass;
>   }
>   
>   def ClampMod : Operand <i1> {
>     let PrintMethod = "printClampSI";
> +  let ParserMatchClass = ClampMatchClass;
>   }
>   
>   } // End OperandType = "OPERAND_IMMEDIATE"
> @@ -391,12 +491,18 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
>   class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
>     SOP1 <outs, ins, asm, []>,
>     SOP1e <op.SI>,
> -  SIMCInstr<opName, SISubtarget.SI>;
> +  SIMCInstr<opName, SISubtarget.SI> {
> +  let isCodeGenOnly = 0;
> +  let AssemblerPredicates = [isSICI];
> +}
>   
>   class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
>     SOP1 <outs, ins, asm, []>,
>     SOP1e <op.VI>,
> -  SIMCInstr<opName, SISubtarget.VI>;
> +  SIMCInstr<opName, SISubtarget.VI> {
> +  let isCodeGenOnly = 0;
> +  let AssemblerPredicates = [isVI];
> +}
>   
>   multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm,
>                      list<dag> pattern> {
> @@ -472,12 +578,16 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
>   class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
>     SOP2<outs, ins, asm, []>,
>     SOP2e<op.SI>,
> -  SIMCInstr<opName, SISubtarget.SI>;
> +  SIMCInstr<opName, SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +}
>   
>   class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
>     SOP2<outs, ins, asm, []>,
>     SOP2e<op.VI>,
> -  SIMCInstr<opName, SISubtarget.VI>;
> +  SIMCInstr<opName, SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +}
>   
>   multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
>     def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
> @@ -539,12 +649,18 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
>   class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
>     SOPK <outs, ins, asm, []>,
>     SOPKe <op.SI>,
> -  SIMCInstr<opName, SISubtarget.SI>;
> +  SIMCInstr<opName, SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +  let isCodeGenOnly = 0;
> +}
>   
>   class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
>     SOPK <outs, ins, asm, []>,
>     SOPKe <op.VI>,
> -  SIMCInstr<opName, SISubtarget.VI>;
> +  SIMCInstr<opName, SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +  let isCodeGenOnly = 0;
> +}
>   
>   multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
>     def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0),
> @@ -583,13 +699,17 @@ class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
>                       string asm> :
>     SMRD <outs, ins, asm, []>,
>     SMRDe <op, imm>,
> -  SIMCInstr<opName, SISubtarget.SI>;
> +  SIMCInstr<opName, SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +}
>   
>   class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
>                       string asm> :
>     SMRD <outs, ins, asm, []>,
>     SMEMe_vi <op, imm>,
> -  SIMCInstr<opName, SISubtarget.VI>;
> +  SIMCInstr<opName, SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +}
>   
>   multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
>                      string asm, list<dag> pattern> {
> @@ -628,8 +748,14 @@ multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
>   def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
>     let PrintMethod = "printOperandAndMods";
>   }
> +
> +def InputModsMatchClass : AsmOperandClass {
> +  let Name = "RegWithInputMods";
> +}
> +
>   def InputModsNoDefault : Operand <i32> {
>     let PrintMethod = "printOperandAndMods";
> +  let ParserMatchClass = InputModsMatchClass;
>   }
>   
>   class getNumSrcArgs<ValueType Src1, ValueType Src2> {
> @@ -837,7 +963,8 @@ class AtomicNoRet <string noRetOp, bit isRet> {
>   class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>     VOP1Common <outs, ins, "", pattern>,
>     VOP <opName>,
> -  SIMCInstr <opName#"_e32", SISubtarget.NONE> {
> +  SIMCInstr <opName#"_e32", SISubtarget.NONE>,
> +  MnemonicAlias<opName#"_e32", opName> {
>     let isPseudo = 1;
>     let isCodeGenOnly = 1;
>   
> @@ -872,18 +999,23 @@ multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
>   class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>     VOP2Common <outs, ins, "", pattern>,
>     VOP <opName>,
> -  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
> +  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
> +  MnemonicAlias<opName#"_e32", opName> {
>     let isPseudo = 1;
>     let isCodeGenOnly = 1;
>   }
>   
>   class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
>     VOP2 <op.SI, outs, ins, opName#asm, []>,
> -  SIMCInstr <opName#"_e32", SISubtarget.SI>;
> +  SIMCInstr <opName#"_e32", SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +}
>   
>   class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
>     VOP2 <op.SI, outs, ins, opName#asm, []>,
> -  SIMCInstr <opName#"_e32", SISubtarget.VI>;
> +  SIMCInstr <opName#"_e32", SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +}
>   
>   multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
>                        string opName, string revOp> {
> @@ -929,7 +1061,8 @@ class VOP3DisableModFields <bit HasSrc0Mods,
>   class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>     VOP3Common <outs, ins, "", pattern>,
>     VOP <opName>,
> -  SIMCInstr<opName#"_e64", SISubtarget.NONE> {
> +  SIMCInstr<opName#"_e64", SISubtarget.NONE>,
> +  MnemonicAlias<opName#"_e64", opName> {
>     let isPseudo = 1;
>     let isCodeGenOnly = 1;
>   }
> @@ -937,22 +1070,30 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>   class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
>     VOP3Common <outs, ins, asm, []>,
>     VOP3e <op>,
> -  SIMCInstr<opName#"_e64", SISubtarget.SI>;
> +  SIMCInstr<opName#"_e64", SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +}
>   
>   class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
>     VOP3Common <outs, ins, asm, []>,
>     VOP3e_vi <op>,
> -  SIMCInstr <opName#"_e64", SISubtarget.VI>;
> +  SIMCInstr <opName#"_e64", SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +}
>   
>   class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
>     VOP3Common <outs, ins, asm, []>,
>     VOP3be <op>,
> -  SIMCInstr<opName#"_e64", SISubtarget.SI>;
> +  SIMCInstr<opName#"_e64", SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +}
>   
>   class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
>     VOP3Common <outs, ins, asm, []>,
>     VOP3be_vi <op>,
> -  SIMCInstr <opName#"_e64", SISubtarget.VI>;
> +  SIMCInstr <opName#"_e64", SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +}
>   
>   multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
>                      string opName, int NumSrcArgs, bit HasMods = 1> {
> @@ -1093,12 +1234,16 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
>     }
>   
>     def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
> -            SIMCInstr <opName, SISubtarget.SI>;
> +            SIMCInstr <opName, SISubtarget.SI> {
> +            let AssemblerPredicates = [isSICI];
> +  }
>   
>     def _vi : VOP3Common <outs, ins, asm, []>,
>               VOP3e_vi <op.VI3>,
>               VOP3DisableFields <1, 0, 0>,
> -            SIMCInstr <opName, SISubtarget.VI>;
> +            SIMCInstr <opName, SISubtarget.VI> {
> +            let AssemblerPredicates = [isVI];
> +  }
>   }
>   
>   multiclass VOP1_Helper <vop1 op, string opName, dag outs,
> @@ -1251,7 +1396,8 @@ let isCodeGenOnly = 0 in {
>   class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>     VOPCCommon <ins, "", pattern>,
>     VOP <opName>,
> -  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
> +  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
> +  MnemonicAlias<opName#"_e32", opName> {
>     let isPseudo = 1;
>     let isCodeGenOnly = 1;
>   }
> @@ -1498,7 +1644,9 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
>   class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
>     DS <outs, ins, asm, []>,
>     DSe <op>,
> -  SIMCInstr <opName, SISubtarget.SI>;
> +  SIMCInstr <opName, SISubtarget.SI> {
> +  let isCodeGenOnly = 0;
> +}
>   
>   class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
>     DS <outs, ins, asm, []>,
> @@ -1512,6 +1660,7 @@ class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm
>     bits<16> offset;
>     let offset0 = offset{7-0};
>     let offset1 = offset{15-8};
> +  let isCodeGenOnly = 0;
>   }
>   
>   class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
> @@ -1539,12 +1688,12 @@ multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
>   multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
>     dag outs = (outs rc:$vdst),
>     dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
> -                 gds:$gds, M0Reg:$m0),
> +                 gds01:$gds, M0Reg:$m0),
>     string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
>   
>     def "" : DS_Pseudo <opName, outs, ins, []>;
>   
> -  let data0 = 0, data1 = 0 in {
> +  let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in {
>       def _si : DS_Real_si <op, opName, outs, ins, asm>;
>       def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
>     }
> @@ -1568,12 +1717,12 @@ multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
>   multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
>     dag outs = (outs),
>     dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
> -              ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0),
> +              ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds, M0Reg:$m0),
>     string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
>   
>     def "" : DS_Pseudo <opName, outs, ins, []>;
>   
> -  let vdst = 0 in {
> +  let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
>       def _si : DS_Real_si <op, opName, outs, ins, asm>;
>       def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
>     }
> @@ -1647,7 +1796,7 @@ multiclass DS_0A_RET <bits<8> op, string opName,
>   
>   multiclass DS_1A_RET_GDS <bits<8> op, string opName,
>     dag outs = (outs VGPR_32:$vdst),
> -  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
> +  dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset, M0Reg:$m0),
>     string asm = opName#" $vdst, $addr"#"$offset gds"> {
>   
>     def "" : DS_Pseudo <opName, outs, ins, []>;
> @@ -1756,6 +1905,20 @@ class mubuf <bits<7> si, bits<7> vi = si> {
>     field bits<7> VI = vi;
>   }
>   
> +let isCodeGenOnly = 0 in {
> +
> +class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> +  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
> +  let lds  = 0;
> +}
> +
> +} // End let isCodeGenOnly = 0
> +
> +class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> +  MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
> +  let lds = 0;
> +}
> +
>   class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
>     bit IsAddr64 = is_addr64;
>     string OpName = NAME # suffix;
> @@ -1799,7 +1962,7 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
>     def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
>              MUBUFAddr64Table <0>;
>   
> -  let addr64 = 0 in {
> +  let addr64 = 0, isCodeGenOnly = 0 in {
>       def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
>     }
>   
> @@ -1812,7 +1975,7 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
>     def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
>              MUBUFAddr64Table <1>;
>   
> -  let addr64 = 1 in {
> +  let addr64 = 1, isCodeGenOnly = 0 in {
>       def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
>     }
>   
> @@ -1820,11 +1983,6 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
>     // for VI appropriately.
>   }
>   
> -class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> -  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
> -  let lds = 0;
> -}
> -
>   multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
>                                   string asm, list<dag> pattern, bit is_return> {
>   
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 5f02a31..45abebc 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -27,16 +27,8 @@ def SendMsgImm : Operand<i32> {
>   }
>   
>   def isGCN : Predicate<"Subtarget->getGeneration() "
> -                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
> -def isSICI : Predicate<
> -  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
> -  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
> ->;
> -def isCI : Predicate<"Subtarget->getGeneration() "
> -                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
> -def isVI : Predicate <
> -  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS"
> ->;
> +                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
> +            AssemblerPredicate<"FeatureGCN">;
>   
>   def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
>   
> @@ -240,9 +232,9 @@ defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
>   >;
>   } // End Defs = [SCC]
>   
> -defm S_CSELECT_B32 : SOP2_SELECT_32 <sop2<0x0a>, "s_cselect_b32", []>;
>   
>   let Uses = [SCC] in {
> +  defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>;
>     defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
>   } // End Uses = [SCC]
>   
> @@ -1653,7 +1645,6 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
>     VOP_F32_F32_I32, AMDGPUldexp
>   >;
>   
> -
>   defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
>     VOP_I32_F32_I32>; // TODO: set "Uses = dst"
>   
> diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
> index 7bb5dc2..f289014 100644
> --- a/lib/Target/R600/SIRegisterInfo.td
> +++ b/lib/Target/R600/SIRegisterInfo.td
> @@ -66,7 +66,7 @@ foreach Index = 0-255 in {
>   //===----------------------------------------------------------------------===//
>   
>   // SGPR 32-bit registers
> -def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
> +def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
>                               (add (sequence "SGPR%u", 0, 101))>;
>   
>   // SGPR 64-bit registers
> @@ -113,7 +113,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
>                                  (add (decimate (shl SGPR_32, 15), 4))]>;
>   
>   // VGPR 32-bit registers
> -def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
> +def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
>                               (add (sequence "VGPR%u", 0, 255))>;
>   
>   // VGPR 64-bit registers
> @@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
>   //  Register classes used as source and destination
>   //===----------------------------------------------------------------------===//
>   
> +class RegImmMatcher<string name> : AsmOperandClass {
> +  let Name = name;
> +  let RenderMethod = "addRegOrImmOperands";
> +}
> +
>   // Special register classes for predicates and the M0 register
>   def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
>     let CopyCost = -1; // Theoretically it is possible to read from SCC,
> @@ -180,7 +185,7 @@ def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
>   def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
>   
>   // Register class for all scalar registers (SGPRs + Special Registers)
> -def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
> +def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
>     (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
>   >;
>   
> @@ -227,15 +232,21 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
>   //  SSrc_* Operands with an SGPR or a 32-bit immediate
>   //===----------------------------------------------------------------------===//
>   
> -def SSrc_32 : RegImmOperand<SReg_32>;
> +def SSrc_32 : RegImmOperand<SReg_32> {
> +  let ParserMatchClass = RegImmMatcher<"SSrc32">;
> +}
>   
> -def SSrc_64 : RegImmOperand<SReg_64>;
> +def SSrc_64 : RegImmOperand<SReg_64> {
> +  let ParserMatchClass = RegImmMatcher<"SSrc64">;
> +}
>   
>   //===----------------------------------------------------------------------===//
>   //  SCSrc_* Operands with an SGPR or a inline constant
>   //===----------------------------------------------------------------------===//
>   
> -def SCSrc_32 : RegInlineOperand<SReg_32>;
> +def SCSrc_32 : RegInlineOperand<SReg_32> {
> +  let ParserMatchClass = RegImmMatcher<"SCSrc32">;
> +}
>   
>   //===----------------------------------------------------------------------===//
>   //  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
> @@ -245,14 +256,30 @@ def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
>   
>   def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
>   
> -def VSrc_32 : RegImmOperand<VS_32>;
> +def VSrc_32 : RegisterOperand<VS_32> {
> +  let OperandNamespace = "AMDGPU";
> +  let OperandType = "OPERAND_REG_IMM32";
> +  let ParserMatchClass = RegImmMatcher<"VSrc32">;
> +}
>   
> -def VSrc_64 : RegImmOperand<VS_64>;
> +def VSrc_64 : RegisterOperand<VS_64> {
> +  let OperandNamespace = "AMDGPU";
> +  let OperandType = "OPERAND_REG_IMM32";
> +  let ParserMatchClass = RegImmMatcher<"VSrc64">;
> +}
>   
>   //===----------------------------------------------------------------------===//
>   //  VCSrc_* Operands with an SGPR, VGPR or an inline constant
>   //===----------------------------------------------------------------------===//
>   
> -def VCSrc_32 : RegInlineOperand<VS_32>;
> +def VCSrc_32 : RegisterOperand<VS_32> {
> +  let OperandNamespace = "AMDGPU";
> +  let OperandType = "OPERAND_REG_INLINE_C";
> +  let ParserMatchClass = RegImmMatcher<"VCSrc32">;
> +}
>   
> -def VCSrc_64 : RegInlineOperand<VS_64>;
> +def VCSrc_64 : RegisterOperand<VS_64> {
> +  let OperandNamespace = "AMDGPU";
> +  let OperandType = "OPERAND_REG_INLINE_C";
> +  let ParserMatchClass = RegImmMatcher<"VCSrc64">;
> +}
> diff --git a/test/MC/R600/ds-err.s b/test/MC/R600/ds-err.s
> new file mode 100644
> index 0000000..52c2740
> --- /dev/null
> +++ b/test/MC/R600/ds-err.s
> @@ -0,0 +1,23 @@
> +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
> +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
> +
> +// offset too big
> +// CHECK: invalid operand for instruction
> +ds_add_u32 v2, v4 offset:1000000000
> +
> +// offset0 twice
> +// CHECK:  error: not a valid operand.
> +ds_write2_b32 v2, v4, v6 offset0:4 offset0:8
> +
> +// offset1 twice
> +// CHECK:  error: not a valid operand.
> +ds_write2_b32 v2, v4, v6 offset1:4 offset1:8
> +
> +// offset0 too big
> +// CHECK: invalid operand for instruction
> +ds_write2_b32 v2, v4, v6 offset0:1000000000
> +
> +// offset1 too big
> +// CHECK: invalid operand for instruction
> +ds_write2_b32 v2, v4, v6 offset1:1000000000
> +
> diff --git a/test/MC/R600/ds.s b/test/MC/R600/ds.s
> new file mode 100644
> index 0000000..e077760
> --- /dev/null
> +++ b/test/MC/R600/ds.s
> @@ -0,0 +1,337 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI  -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Checks for 16-bit Offsets
> +//===----------------------------------------------------------------------===//
> +
> +ds_add_u32 v2, v4 offset:16
> +// CHECK: ds_add_u32 v2, v4 offset:16 ; encoding: [0x10,0x00,0x00,0xd8,0x02,0x04,0x00,0x00]
> +
> +//===----------------------------------------------------------------------===//
> +// Checks for 2 8-bit Offsets
> +//===----------------------------------------------------------------------===//
> +
> +ds_write2_b32 v2, v4, v6 offset0:4
> +// CHECK: ds_write2_b32 v2, v4, v6 offset0:4 offset1:0 ; encoding: [0x04,0x00,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write2_b32 v2, v4, v6 offset0:4 offset1:8
> +// CHECK: ds_write2_b32 v2, v4, v6 offset0:4 offset1:8 ; encoding: [0x04,0x08,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write2_b32 v2, v4, v6 offset1:8
> +// CHECK: ds_write2_b32 v2, v4, v6 offset0:0 offset1:8 ; encoding: [0x00,0x08,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_read2_b32 v[8:9], v2 offset0:4
> +// CHECK: ds_read2_b32 v[8:9], v2 offset0:4 offset1:0 ; encoding: [0x04,0x00,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b32 v[8:9], v2 offset0:4 offset1:8
> +// CHECK: ds_read2_b32 v[8:9], v2 offset0:4 offset1:8 ; encoding: [0x04,0x08,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b32 v[8:9], v2 offset1:8
> +// CHECK: ds_read2_b32 v[8:9], v2 offset0:0 offset1:8 ; encoding: [0x00,0x08,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +ds_add_u32 v2, v4
> +// CHECK: ds_add_u32 v2, v4 ; encoding: [0x00,0x00,0x00,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_sub_u32 v2, v4
> +// CHECK: ds_sub_u32 v2, v4 ; encoding: [0x00,0x00,0x04,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_rsub_u32 v2, v4
> +// CHECK: ds_rsub_u32 v2, v4 ; encoding: [0x00,0x00,0x08,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_inc_u32 v2, v4
> +// CHECK: ds_inc_u32 v2, v4 ; encoding: [0x00,0x00,0x0c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_dec_u32 v2, v4
> +// CHECK: ds_dec_u32 v2, v4 ; encoding: [0x00,0x00,0x10,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_min_i32 v2, v4
> +// CHECK: ds_min_i32 v2, v4 ; encoding: [0x00,0x00,0x14,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_max_i32 v2, v4
> +// CHECK: ds_max_i32 v2, v4 ; encoding: [0x00,0x00,0x18,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_min_u32 v2, v4
> +// CHECK: ds_min_u32 v2, v4 ; encoding: [0x00,0x00,0x1c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_max_u32 v2, v4
> +// CHECK: ds_max_u32 v2, v4 ; encoding: [0x00,0x00,0x20,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_and_b32 v2, v4
> +// CHECK: ds_and_b32 v2, v4 ; encoding: [0x00,0x00,0x24,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_or_b32 v2, v4
> +// CHECK: ds_or_b32 v2, v4 ; encoding: [0x00,0x00,0x28,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_xor_b32 v2, v4
> +// CHECK: ds_xor_b32 v2, v4 ; encoding: [0x00,0x00,0x2c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_mskor_b32 v2, v4, v6
> +// CHECK: ds_mskor_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x30,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write_b32 v2, v4
> +// CHECK: ds_write_b32 v2, v4 ; encoding: [0x00,0x00,0x34,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_write2_b32 v2, v4, v6
> +// CHECK: ds_write2_b32 v2, v4, v6 offset0:0 offset1:0 ; encoding: [0x00,0x00,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write2st64_b32 v2, v4, v6
> +// CHECK: ds_write2st64_b32 v2, v4, v6 offset0:0 offset1:0 ; encoding: [0x00,0x00,0x3c,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_b32 v2, v4, v6
> +// CHECK: ds_cmpst_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x40,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_f32 v2, v4, v6
> +// CHECK: ds_cmpst_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x44,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_min_f32 v2, v4, v6
> +// CHECK: ds_min_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x48,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_max_f32 v2, v4, v6
> +// CHECK: ds_max_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x4c,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_gws_init v2 gds
> +// CHECK: ds_gws_init v2 gds ; encoding: [0x00,0x00,0x66,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_sema_v v2 gds
> +// CHECK: ds_gws_sema_v v2 gds ; encoding: [0x00,0x00,0x6a,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_sema_br v2 gds
> +// CHECK: ds_gws_sema_br v2 gds ; encoding: [0x00,0x00,0x6e,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_sema_p v2 gds
> +// CHECK: ds_gws_sema_p v2 gds ; encoding: [0x00,0x00,0x72,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_barrier v2 gds
> +// CHECK: ds_gws_barrier v2 gds ; encoding: [0x00,0x00,0x76,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_write_b8 v2, v4
> +// CHECK: ds_write_b8 v2, v4 ; encoding: [0x00,0x00,0x78,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_write_b16 v2, v4
> +// CHECK: ds_write_b16 v2, v4 ; encoding: [0x00,0x00,0x7c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_add_rtn_u32 v8, v2, v4
> +// CHECK: ds_add_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x80,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_sub_rtn_u32 v8, v2, v4
> +// CHECK: ds_sub_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x84,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_rsub_rtn_u32 v8, v2, v4
> +// CHECK: ds_rsub_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x88,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_inc_rtn_u32 v8, v2, v4
> +// CHECK: ds_inc_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x8c,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_dec_rtn_u32 v8, v2, v4
> +// CHECK: ds_dec_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x90,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_i32 v8, v2, v4
> +// CHECK: ds_min_rtn_i32 v8, v2, v4 ; encoding: [0x00,0x00,0x94,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_i32 v8, v2, v4
> +// CHECK: ds_max_rtn_i32 v8, v2, v4 ; encoding: [0x00,0x00,0x98,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_u32 v8, v2, v4
> +// CHECK: ds_min_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x9c,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_u32 v8, v2, v4
> +// CHECK: ds_max_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0xa0,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_and_rtn_b32 v8, v2, v4
> +// CHECK: ds_and_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xa4,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_or_rtn_b32 v8, v2, v4
> +// CHECK: ds_or_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xa8,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_xor_rtn_b32 v8, v2, v4
> +// CHECK: ds_xor_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xac,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_mskor_rtn_b32 v8, v2, v4, v6
> +// CHECK: ds_mskor_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xb0,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg_rtn_b32 v8, v2, v4
> +// CHECK: ds_wrxchg_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xb4,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6
> +// CHECK: ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xb8,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg2st64_rtn_b32 v[8:9] v2, v4, v6
> +// CHECK: ds_wrxchg2st64_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xbc,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_b32 v8, v2, v4, v6
> +// CHECK: ds_cmpst_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc0,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_f32 v8, v2, v4, v6
> +// CHECK: ds_cmpst_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc4,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_min_rtn_f32 v8, v2, v4, v6
> +// CHECK: ds_min_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc8,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_max_rtn_f32 v8, v2, v4, v6
> +// CHECK: ds_max_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xcc,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_swizzle_b32 v8, v2
> +// CHECK: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_b32 v8, v2
> +// CHECK: ds_read_b32 v8, v2 ; encoding: [0x00,0x00,0xd8,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b32 v[8:9], v2
> +// CHECK: ds_read2_b32 v[8:9], v2 offset0:0 offset1:0 ; encoding: [0x00,0x00,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2st64_b32 v[8:9], v2
> +// CHECK: ds_read2st64_b32 v[8:9], v2 offset0:0 offset1:0 ; encoding: [0x00,0x00,0xe0,0xd8,0x02,0x00,0x00,0x08]
Why are these 0 offsets being printed? I thought those were skipped
> +
> +ds_read_i8 v8, v2
> +// CHECK: ds_read_i8 v8, v2 ; encoding: [0x00,0x00,0xe4,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_u8 v8, v2
> +// CHECK: ds_read_u8 v8, v2 ; encoding: [0x00,0x00,0xe8,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_i16 v8, v2
> +// CHECK: ds_read_i16 v8, v2 ; encoding: [0x00,0x00,0xec,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_u16 v8, v2
> +// CHECK: ds_read_u16 v8, v2 ; encoding: [0x00,0x00,0xf0,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_consume v8
> +// CHECK: ds_consume v8 ; encoding: [0x00,0x00,0xf4,0xd8,0x00,0x00,0x00,0x08]
> +
> +ds_append v8
> +// CHECK: ds_append v8 ; encoding: [0x00,0x00,0xf8,0xd8,0x00,0x00,0x00,0x08]
> +
> +ds_ordered_count v8, v2 gds
> +// CHECK: ds_ordered_count v8, v2 gds ; encoding: [0x00,0x00,0xfe,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_add_u64 v2, v[4:5]
> +// CHECK: ds_add_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x00,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_sub_u64 v2, v[4:5]
> +// CHECK: ds_sub_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x04,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_rsub_u64 v2, v[4:5]
> +// CHECK: ds_rsub_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x08,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_inc_u64 v2, v[4:5]
> +// CHECK: ds_inc_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x0c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_dec_u64 v2, v[4:5]
> +// CHECK: ds_dec_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x10,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_min_i64 v2, v[4:5]
> +// CHECK: ds_min_i64 v2, v[4:5] ; encoding: [0x00,0x00,0x14,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_max_i64 v2, v[4:5]
> +// CHECK: ds_max_i64 v2, v[4:5] ; encoding: [0x00,0x00,0x18,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_min_u64 v2, v[4:5]
> +// CHECK: ds_min_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x1c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_max_u64 v2, v[4:5]
> +// CHECK: ds_max_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x20,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_and_b64 v2, v[4:5]
> +// CHECK: ds_and_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x24,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_or_b64 v2, v[4:5]
> +// CHECK: ds_or_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x28,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_xor_b64 v2, v[4:5]
> +// CHECK: ds_xor_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x2c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_mskor_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_mskor_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x30,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_write_b64 v2, v[4:5]
> +// CHECK: ds_write_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x34,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_write2_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_write2_b64 v2, v[4:5], v[6:7] offset0:0 offset1:0 ; encoding: [0x00,0x00,0x38,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_write2st64_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_write2st64_b64 v2, v[4:5], v[6:7] offset0:0 offset1:0 ; encoding: [0x00,0x00,0x3c,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x40,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_f64 v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_f64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x44,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_min_f64 v2, v[4:5]
> +// CHECK: ds_min_f64 v2, v[4:5] ; encoding: [0x00,0x00,0x48,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_max_f64 v2, v[4:5]
> +// CHECK: ds_max_f64 v2, v[4:5] ; encoding: [0x00,0x00,0x4c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_add_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_add_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x80,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_sub_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_sub_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x84,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_rsub_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_rsub_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x88,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_inc_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_inc_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x8c,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_dec_rtn_u64 v[8:9] v2, v[4:5]
> +// CHECK: ds_dec_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x90,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_i64 v[8:9], v2, v[4:5]
> +// CHECK: ds_min_rtn_i64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x94,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_i64 v[8:9], v2, v[4:5]
> +// CHECK: ds_max_rtn_i64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x98,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_min_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x9c,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_max_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa0,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_and_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_and_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa4,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_or_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_or_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa8,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_xor_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_xor_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xac,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_mskor_rtn_b64 v[8:9], v2, v[4:5], v[6:7]
> +// CHECK: ds_mskor_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xb0,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_wrxchg_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xb4,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
> +// CHECK: ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xb8,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
> +// CHECK: ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xbc,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xc0,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_f64 v[8:9], v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_rtn_f64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xc4,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_min_rtn_f64 v[8:9], v2, v[4:5]
> +// CHECK: ds_min_rtn_f64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xc8,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_f64 v[8:9], v2, v[4:5]
> +// CHECK: ds_max_rtn_f64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xcc,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_read_b64 v[8:9], v2
> +// CHECK: ds_read_b64 v[8:9], v2 ; encoding: [0x00,0x00,0xd8,0xd9,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b64 v[8:11], v2
> +// CHECK: ds_read2_b64 v[8:11], v2 offset0:0 offset1:0 ; encoding: [0x00,0x00,0xdc,0xd9,0x02,0x00,0x00,0x08]
> +
> +ds_read2st64_b64 v[8:11], v2
> +// CHECK: ds_read2st64_b64 v[8:11], v2 offset0:0 offset1:0 ; encoding: [0x00,0x00,0xe0,0xd9,0x02,0x00,0x00,0x08]
> diff --git a/test/MC/R600/mubuf.s b/test/MC/R600/mubuf.s
> new file mode 100644
> index 0000000..78d365a
> --- /dev/null
> +++ b/test/MC/R600/mubuf.s
> @@ -0,0 +1,352 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
Why does this need to run with and without -mcpu=SI if that is the default?
> +
> +//===----------------------------------------------------------------------===//
> +// Test for different operand combinations
> +//===----------------------------------------------------------------------===//
> +
> +//===----------------------------------------------------------------------===//
> +// load - immediate offset only
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, s[4:7], s1
> +// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 glc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 slc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 tfe
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 tfe glc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - vgpr offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen  offset:4 glc ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen tfe glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - vgpr index
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen tfe glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - vgpr index and offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - addr64
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - immediate offset only
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, s[4:7], s1
> +// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 glc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 slc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 tfe
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 tfe glc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - vgpr offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen  offset:4 glc ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen tfe glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - vgpr index
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen tfe glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - vgpr index and offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - addr64
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_format_x v1, s[4:7], s1
> +// CHECK: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_format_xy v[1:2], s[4:7], s1
> +// CHECK: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_format_xyz v[1:3], s[4:7], s1
> +// CHECK: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_format_xyzw v[1:4], s[4:7], s1
> +// CHECK: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_x v1, s[4:7], s1
> +// CHECK: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_xy v[1:2], s[4:7], s1
> +// CHECK: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_xyz v[1:3], s[4:7], s1
> +// CHECK: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_xyzw v[1:4], s[4:7], s1
> +// CHECK: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_ubyte v1, s[4:7], s1
> +// CHECK: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_sbyte v1, s[4:7], s1
> +// CHECK: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_ushort v1, s[4:7], s1
> +// CHECK: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_sshort v1, s[4:7], s1
> +// CHECK: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1
> +// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dwordx2 v[1:2], s[4:7], s1
> +// CHECK: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dwordx4 v[1:4], s[4:7], s1
> +// CHECK: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_byte v1, s[4:7], s1
> +// CHECK: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_short v1, s[4:7], s1
> +// CHECK: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1 s[4:7], s1
> +// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dwordx2 v[1:2], s[4:7], s1
> +// CHECK: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dwordx4 v[1:4], s[4:7], s1
> +// CHECK: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
> +
> +// TODO: Atomics
> diff --git a/test/MC/R600/smrd.s b/test/MC/R600/smrd.s
> new file mode 100644
> index 0000000..b67abf7
> --- /dev/null
> +++ b/test/MC/R600/smrd.s
> @@ -0,0 +1,32 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +s_load_dword s1, s[2:3], 1
> +// CHECK: s_load_dword s1, s[2:3], 0x1 ; encoding: [0x01,0x83,0x00,0xc0]
> +
> +s_load_dword s1, s[2:3], s4
> +// CHECK: s_load_dword s1, s[2:3], s4 ; encoding: [0x04,0x82,0x00,0xc0]
> +
> +s_load_dwordx2 s[2:3], s[2:3], 1
> +// CHECK: s_load_dwordx2 s[2:3], s[2:3], 0x1 ; encoding: [0x01,0x03,0x41,0xc0]
> +
> +s_load_dwordx2 s[2:3], s[2:3], s4
> +// CHECK: s_load_dwordx2 s[2:3], s[2:3], s4 ; encoding: [0x04,0x02,0x41,0xc0]
> +
> +s_load_dwordx4 s[4:7], s[2:3], 1
> +// CHECK: s_load_dwordx4 s[4:7], s[2:3], 0x1 ; encoding: [0x01,0x03,0x82,0xc0]
> +
> +s_load_dwordx4 s[4:7], s[2:3], s4
> +// CHECK: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
> +
> +s_load_dwordx8 s[8:15], s[2:3], 1
> +// CHECK: s_load_dwordx8 s[8:15], s[2:3], 0x1 ; encoding: [0x01,0x03,0xc4,0xc0]
> +
> +s_load_dwordx8 s[8:15], s[2:3], s4
> +// CHECK: s_load_dwordx8 s[8:15], s[2:3], s4 ; encoding: [0x04,0x02,0xc4,0xc0]
> +
> +s_load_dwordx16 s[16:31], s[2:3], 1
> +// CHECK: s_load_dwordx16 s[16:31], s[2:3], 0x1 ; encoding: [0x01,0x03,0x08,0xc1]
> +
> +s_load_dwordx16 s[16:31], s[2:3], s4
> +// CHECK: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1]
> diff --git a/test/MC/R600/sop1-err.s b/test/MC/R600/sop1-err.s
> new file mode 100644
> index 0000000..6206276
> --- /dev/null
> +++ b/test/MC/R600/sop1-err.s
> @@ -0,0 +1,22 @@
> +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
> +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
> +
> +s_mov_b32 v1, s2
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s1, v0
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b64 s1, s[0:1]
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b64 s[0:1], s1
> +// CHECK: error: invalid operand for instruction
> +
> +// Immediate greater than 32-bits
> +s_mov_b32 s1, 0xfffffffff
> +// CHECK: error: invalid immediate: only 32-bit values are legal
> +
> +// Immediate greater than 32-bits
> +s_mov_b64 s[0:1], 0xfffffffff
> +// CHECK: error: invalid immediate: only 32-bit values are legal
Tests with out of bounds register numbers or register ranges that are 
too long would be good to test


> diff --git a/test/MC/R600/sop1.s b/test/MC/R600/sop1.s
> new file mode 100644
> index 0000000..5844a8a
> --- /dev/null
> +++ b/test/MC/R600/sop1.s
> @@ -0,0 +1,174 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +s_mov_b32 s1, s2
> +// CHECK: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe]
> +
> +s_mov_b32 s1, 1
> +// CHECK: s_mov_b32 s1, 1 ; encoding: [0x81,0x03,0x81,0xbe]
> +
> +s_mov_b32 s1, 100
> +// CHECK: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00]
> +
> +s_mov_b64 s[2:3], s[4:5]
> +// CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
> +
> +s_mov_b64 s[2:3], 0xffffffffffffffff
> +// CHECK: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe]
> +
> +s_cmov_b32 s1, 200
> +// CHECK: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x05,0x81,0xbe,0xc8,0x00,0x00,0x00]
> +
> +// s_cmov_b64 s[2:3], 0xabcdef12
> +// FIXME-CHECK: s_cmov_b64 s[2:3], 0xabcdef12 ; encoding: [0xff,0x05,0x82,0xb3,0x12,0xef,0xcd,0xab]

Some tests with inline FP immediates would be useful (for float and double)

> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +s_mov_b32 s1, s2
> +// CHECK: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe]
> +
> +s_mov_b64 s[2:3], s[4:5]
> +// CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
> +
> +s_cmov_b32 s1, s2
> +// CHECK: s_cmov_b32 s1, s2 ; encoding: [0x02,0x05,0x81,0xbe]
> +
> +s_cmov_b64 s[2:3], s[4:5]
> +// CHECK: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x06,0x82,0xbe]
> +
> +s_not_b32 s1, s2
> +// CHECK: s_not_b32 s1, s2 ; encoding: [0x02,0x07,0x81,0xbe]
> +
> +s_not_b64 s[2:3], s[4:5]
> +// CHECK: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x08,0x82,0xbe]
> +
> +s_wqm_b32 s1, s2
> +// CHECK: s_wqm_b32 s1, s2 ; encoding: [0x02,0x09,0x81,0xbe]
> +
> +s_wqm_b64 s[2:3], s[4:5]
> +// CHECK: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0a,0x82,0xbe]
> +
> +s_brev_b32 s1, s2
> +// CHECK: s_brev_b32 s1, s2 ; encoding: [0x02,0x0b,0x81,0xbe]
> +
> +s_brev_b64 s[2:3], s[4:5]
> +// CHECK: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0c,0x82,0xbe]
> +
> +s_bcnt0_i32_b32 s1, s2
> +// CHECK: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0d,0x81,0xbe]
> +
> +s_bcnt0_i32_b64 s1, s[2:3]
> +// CHECK: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0e,0x81,0xbe]
> +
> +s_bcnt1_i32_b32 s1, s2
> +// CHECK: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0f,0x81,0xbe]
> +
> +s_bcnt1_i32_b64 s1, s[2:3]
> +// CHECK: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x10,0x81,0xbe]
> +
> +s_ff0_i32_b32 s1, s2
> +// CHECK: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x11,0x81,0xbe]
> +
> +s_ff0_i32_b64 s1, s[2:3]
> +// CHECK: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x12,0x81,0xbe]
> +
> +s_ff1_i32_b32 s1, s2
> +// CHECK: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x13,0x81,0xbe]
> +
> +s_ff1_i32_b64 s1, s[2:3]
> +// CHECK: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x14,0x81,0xbe]
> +
> +s_flbit_i32_b32 s1, s2
> +// CHECK: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x15,0x81,0xbe]
> +
> +s_flbit_i32_b64 s1, s[2:3]
> +// CHECK: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x16,0x81,0xbe]
> +
> +s_flbit_i32 s1, s2
> +// CHECK: s_flbit_i32 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe]
> +
> +s_flbit_i32_i64 s1, s[2:3]
> +// CHECK: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x18,0x81,0xbe]
> +
> +s_sext_i32_i8 s1, s2
> +// CHECK: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x19,0x81,0xbe]
> +
> +s_sext_i32_i16 s1, s2
> +// CHECK: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe]
> +
> +s_bitset0_b32 s1, s2
> +// CHECK: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x1b,0x81,0xbe]
> +
> +s_bitset0_b64 s[2:3], s[4:5]
> +// CHECK: s_bitset0_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1c,0x82,0xbe]
> +
> +s_bitset1_b32 s1, s2
> +// CHECK: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1d,0x81,0xbe]
> +
> +s_bitset1_b64 s[2:3], s[4:5]
> +// CHECK: s_bitset1_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1e,0x82,0xbe]
> +
> +s_getpc_b64 s[2:3]
> +// CHECK: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1f,0x82,0xbe]
> +
> +s_setpc_b64 s[2:3], s[4:5]
> +// CHECK: s_setpc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x20,0x82,0xbe]
> +
> +s_swappc_b64 s[2:3], s[4:5]
> +// CHECK: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe]
> +
> +s_rfe_b64 s[2:3], s[4:5]
> +// CHECK: s_rfe_b64 s[2:3], s[4:5] ; encoding: [0x04,0x22,0x82,0xbe]
> +
> +s_and_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe]
> +
> +s_or_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe]
> +
> +s_xor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe]
> +
> +s_andn2_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe]
> +
> +s_orn2_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x28,0x82,0xbe]
> +
> +s_nand_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe]
> +
> +s_nor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2a,0x82,0xbe]
> +
> +s_xnor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe]
> +
> +s_quadmask_b32 s1, s2
> +// CHECK: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe]
> +
> +s_quadmask_b64 s[2:3], s[4:5]
> +// CHECK: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe]
> +
> +s_movrels_b32 s1, s2
> +// CHECK: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2e,0x81,0xbe]
> +
> +s_movrels_b64 s[2:3], s[4:5]
> +// CHECK: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2f,0x82,0xbe]
> +
> +s_movreld_b32 s1, s2
> +// CHECK: s_movreld_b32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe]
> +
> +s_movreld_b64 s[2:3], s[4:5]
> +// CHECK: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x31,0x82,0xbe]
> +
> +s_cbranch_join s[4:5]
> +// CHECK: s_cbranch_join s[4:5] ; encoding: [0x04,0x32,0x80,0xbe]
> +
> +s_abs_i32 s1, s2
> +// CHECK: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe]
> +
> +s_mov_fed_b32 s1, s2

Tests for s_getreg_b32 / s_setreg_b32 / s_setreg_imm32_b32 would be 
useful since there's no other way to use them right now and are the kind 
of weird thing inline assembly use cases are likely to want to use (like 
a math library temporarily changing the floating point mode)

> +// CHECK: s_mov_fed_b32 s1, s2 ; encoding: [0x02,0x35,0x81,0xbe]
> diff --git a/test/MC/R600/sop2.s b/test/MC/R600/sop2.s
> new file mode 100644
> index 0000000..9a7a1c0
> --- /dev/null
> +++ b/test/MC/R600/sop2.s
> @@ -0,0 +1,131 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +// CHECK: s_add_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x80]
> +s_add_u32 s1, s2, s3
> +
> +// CHECK: s_sub_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x80]
> +s_sub_u32 s1, s2, s3
> +
> +// CHECK: s_add_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x81]
> +s_add_i32 s1, s2, s3
> +
> +// CHECK: s_sub_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x81]
> +s_sub_i32 s1, s2, s3
> +
> +// CHECK: s_addc_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x82]
> +s_addc_u32 s1, s2, s3
> +
> +// CHECK: s_subb_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x82]
> +s_subb_u32 s1, s2, s3
> +
> +// CHECK: s_min_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x83]
> +s_min_i32 s1, s2, s3
> +
> +// CHECK: s_min_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x83]
> +s_min_u32 s1, s2, s3
> +
> +// CHECK: s_max_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x84]
> +s_max_i32 s1, s2, s3
> +
> +// CHECK: s_max_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x84]
> +s_max_u32 s1, s2, s3
> +
> +// CHECK: s_cselect_b32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x85]
> +s_cselect_b32 s1, s2, s3
> +
> +// CHECK: s_cselect_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x85]
> +s_cselect_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_and_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x87]
> +s_and_b32 s2, s4, s6
> +
> +// CHECK: s_and_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x87]
> +s_and_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_or_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x88]
> +s_or_b32 s2, s4, s6
> +
> +// CHECK: s_or_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x88]
> +s_or_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_xor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x89]
> +s_xor_b32 s2, s4, s6
> +
> +// CHECK: s_xor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x89]
> +s_xor_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_andn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8a]
> +s_andn2_b32 s2, s4, s6
> +
> +// CHECK: s_andn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8a]
> +s_andn2_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_orn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8b]
> +s_orn2_b32 s2, s4, s6
> +
> +// CHECK: s_orn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8b]
> +s_orn2_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_nand_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8c]
> +s_nand_b32 s2, s4, s6
> +
> +// CHECK: s_nand_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8c]
> +s_nand_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_nor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8d]
> +s_nor_b32 s2, s4, s6
> +
> +// CHECK: s_nor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8d]
> +s_nor_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_xnor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8e]
> +s_xnor_b32 s2, s4, s6
> +
> +// CHECK: s_xnor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8e]
> +s_xnor_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_lshl_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8f]
> +s_lshl_b32 s2, s4, s6
> +
> +// CHECK: s_lshl_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8f]
> +s_lshl_b64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_lshr_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x90]
> +s_lshr_b32 s2, s4, s6
> +
> +// CHECK: s_lshr_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x90]
> +s_lshr_b64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_ashr_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x91]
> +s_ashr_i32 s2, s4, s6
> +
> +// CHECK: s_ashr_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x91]
> +s_ashr_i64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_bfm_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x92]
> +s_bfm_b32 s2, s4, s6
> +
> +// CHECK: s_bfm_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x92]
> +s_bfm_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_mul_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x93]
> +s_mul_i32 s2, s4, s6
> +
> +// CHECK: s_bfe_u32 s2, s4, s6 ; encoding: [0x04,0x06,0x82,0x93]
> +s_bfe_u32 s2, s4, s6
> +
> +// CHECK: s_bfe_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x94]
> +s_bfe_i32 s2, s4, s6
> +
> +// CHECK: s_bfe_u64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x94]
> +s_bfe_u64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_bfe_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x02,0x95]
> +s_bfe_i64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_cbranch_g_fork s[4:5], s[6:7] ; encoding: [0x04,0x06,0x80,0x95]
> +s_cbranch_g_fork s[4:5], s[6:7]
> +
> +// CHECK: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x96]
> +s_absdiff_i32 s2, s4, s6
> diff --git a/test/MC/R600/sopc.s b/test/MC/R600/sopc.s
> new file mode 100644
> index 0000000..0899c1a
> --- /dev/null
> +++ b/test/MC/R600/sopc.s
> @@ -0,0 +1,9 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +s_cmp_eq_i32 s1, s2
> +// CHECK: s_cmp_eq_i32 s1, s2 ; encoding: [0x01,0x02,0x00,0xbf]
> diff --git a/test/MC/R600/sopp.s b/test/MC/R600/sopp.s
> index 0f186b1..e77db4d 100644
> --- a/test/MC/R600/sopp.s
> +++ b/test/MC/R600/sopp.s
> @@ -1,4 +1,5 @@
> -// RUN: llvm-mc -arch=amdgcn -mcpu=SI  -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
>   
>     s_nop 1            // CHECK: s_nop 1 ; encoding: [0x01,0x00,0x80,0xbf]
A test with s_nop at the immediate limit would be useful and with 0

>     s_endpgm           // CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
> diff --git a/test/MC/R600/vop1.s b/test/MC/R600/vop1.s
> new file mode 100644
> index 0000000..9c9a6b2
> --- /dev/null
> +++ b/test/MC/R600/vop1.s
> @@ -0,0 +1,182 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +// CHECK: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
> +v_nop
> +
> +// CHECK: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e]
> +v_mov_b32 v1, v2
> +
> +// CHECK: v_readfirstlane_b32 s1, v2 ; encoding: [0x02,0x05,0x02,0x7e]
> +v_readfirstlane_b32 s1, v2
> +
> +// CHECK: v_cvt_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x07,0x02,0x7e]
> +v_cvt_i32_f64 v1, v[2:3]
> +
> +// CHECK: v_cvt_f64_i32_e32 v[1:2], v2 ; encoding: [0x02,0x09,0x02,0x7e]
> +v_cvt_f64_i32 v[1:2], v2
> +
> +// CHECK: v_cvt_f32_i32_e32 v1, v2 ; encoding: [0x02,0x0b,0x02,0x7e]
> +v_cvt_f32_i32 v1, v2
> +
> +// CHECK: v_cvt_f32_u32_e32 v1, v2 ; encoding: [0x02,0x0d,0x02,0x7e]
> +v_cvt_f32_u32 v1, v2
> +
> +// CHECK: v_cvt_u32_f32_e32 v1, v2 ; encoding: [0x02,0x0f,0x02,0x7e
> +v_cvt_u32_f32 v1, v2
> +
> +// CHECK: v_cvt_i32_f32_e32 v1, v2 ; encoding: [0x02,0x11,0x02,0x7e]
> +v_cvt_i32_f32 v1, v2
> +
> +// CHECK: v_mov_fed_b32_e32 v1, v2 ; encoding: [0x02,0x13,0x02,0x7e]
> +v_mov_fed_b32 v1, v2
> +
> +// CHECK: v_cvt_f16_f32_e32 v1, v2 ; encoding: [0x02,0x15,0x02,0x7e]
> +v_cvt_f16_f32 v1, v2
> +
> +// CHECK: v_cvt_f32_f16_e32 v1, v2 ; encoding: [0x02,0x17,0x02,0x7e]
> +v_cvt_f32_f16 v1, v2
> +
> +// CHECK: v_cvt_rpi_i32_f32_e32 v1, v2 ; encoding: [0x02,0x19,0x02,0x7e]
> +v_cvt_rpi_i32_f32 v1, v2
> +
> +// CHECK: v_cvt_flr_i32_f32_e32 v1, v2 ; encoding: [0x02,0x1b,0x02,0x7e]
> +v_cvt_flr_i32_f32 v1, v2
> +
> +// CHECK: v_cvt_off_f32_i4_e32 v1, v2 ; encoding: [0x02,0x1d,0x02,0x7e]
> +v_cvt_off_f32_i4_e32 v1, v2
> +
> +// CHECK: v_cvt_f32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x1f,0x02,0x7e]
> +v_cvt_f32_f64 v1, v[2:3]
> +
> +// CHECK: v_cvt_f64_f32_e32 v[1:2], v2 ; encoding: [0x02,0x21,0x02,0x7e]
> +v_cvt_f64_f32 v[1:2], v2
> +
> +// CHECK: v_cvt_f32_ubyte0_e32 v1, v2 ; encoding: [0x02,0x23,0x02,0x7e]
> +v_cvt_f32_ubyte0 v1, v2
> +
> +// CHECK: v_cvt_f32_ubyte1_e32 v1, v2 ; encoding: [0x02,0x25,0x02,0x7e]
> +v_cvt_f32_ubyte1_e32 v1, v2
> +
> +// CHECK: v_cvt_f32_ubyte2_e32 v1, v2 ; encoding: [0x02,0x27,0x02,0x7e]
> +v_cvt_f32_ubyte2 v1, v2
> +
> +// CHECK: v_cvt_f32_ubyte3_e32 v1, v2 ; encoding: [0x02,0x29,0x02,0x7e]
> +v_cvt_f32_ubyte3 v1, v2
> +
> +// CHECK: v_cvt_u32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x2b,0x02,0x7e]
> +v_cvt_u32_f64 v1, v[2:3]
> +
> +// CHECK: v_cvt_f64_u32_e32 v[1:2], v2 ; encoding: [0x02,0x2d,0x02,0x7e]
> +v_cvt_f64_u32 v[1:2], v2
> +
> +// CHECK: v_fract_f32_e32 v1, v2 ; encoding: [0x02,0x41,0x02,0x7e]
> +v_fract_f32 v1, v2
> +
> +// CHECK: v_trunc_f32_e32 v1, v2 ; encoding: [0x02,0x43,0x02,0x7e]
> +v_trunc_f32 v1, v2
> +
> +// CHECK: v_ceil_f32_e32 v1, v2 ; encoding: [0x02,0x45,0x02,0x7e]
> +v_ceil_f32 v1, v2
> +
> +// CHECK: v_rndne_f32_e32 v1, v2 ; encoding: [0x02,0x47,0x02,0x7e]
> +v_rndne_f32 v1, v2
> +
> +// CHECK: v_floor_f32_e32 v1, v2 ; encoding: [0x02,0x49,0x02,0x7e]
> +v_floor_f32_e32 v1, v2
> +
> +// CHECK: v_exp_f32_e32 v1, v2 ; encoding: [0x02,0x4b,0x02,0x7e]
> +v_exp_f32 v1, v2
> +
> +// CHECK: v_log_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x4d,0x02,0x7e]
> +v_log_clamp_f32 v1, v2
> +
> +// CHECK: v_log_f32_e32 v1, v2 ; encoding: [0x02,0x4f,0x02,0x7e]
> +v_log_f32 v1, v2
> +
> +// CHECK: v_rcp_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x51,0x02,0x7e]
> +v_rcp_clamp_f32 v1, v2
> +
> +// CHECK: v_rcp_legacy_f32_e32 v1, v2 ; encoding: [0x02,0x53,0x02,0x7e]
> +v_rcp_legacy_f32 v1, v2
> +
> +// CHECK: v_rcp_f32_e32 v1, v2 ; encoding: [0x02,0x55,0x02,0x7e]
> +v_rcp_f32 v1, v2
> +
> +// CHECK: v_rcp_iflag_f32_e32 v1, v2 ; encoding: [0x02,0x57,0x02,0x7e]
> +v_rcp_iflag_f32 v1, v2
> +
> +// CHECK: v_rsq_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x59,0x02,0x7e]
> +v_rsq_clamp_f32 v1, v2
> +
> +// CHECK: v_rsq_legacy_f32_e32 v1, v2 ; encoding: [0x02,0x5b,0x02,0x7e]
> +v_rsq_legacy_f32 v1, v2
> +
> +// CHECK: v_rsq_f32_e32 v1, v2 ; encoding: [0x02,0x5d,0x02,0x7e]
> +v_rsq_f32_e32 v1, v2
> +
> +// CHECK: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x5f,0x02,0x7e]
> +v_rcp_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_rcp_clamp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x61,0x02,0x7e]
> +v_rcp_clamp_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_rsq_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x63,0x02,0x7e]
> +v_rsq_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_rsq_clamp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x65,0x02,0x7e]
> +v_rsq_clamp_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_sqrt_f32_e32 v1, v2 ; encoding: [0x02,0x67,0x02,0x7e]
> +v_sqrt_f32 v1, v2
> +
> +// CHECK: v_sqrt_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x69,0x02,0x7e]
> +v_sqrt_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_sin_f32_e32 v1, v2 ; encoding: [0x02,0x6b,0x02,0x7e]
> +v_sin_f32 v1, v2
> +
> +// CHECK: v_cos_f32_e32 v1, v2 ; encoding: [0x02,0x6d,0x02,0x7e]
> +v_cos_f32 v1, v2
> +
> +// CHECK: v_not_b32_e32 v1, v2 ; encoding: [0x02,0x6f,0x02,0x7e]
> +v_not_b32 v1, v2
> +
> +// CHECK: v_bfrev_b32_e32 v1, v2 ; encoding: [0x02,0x71,0x02,0x7e]
> +v_bfrev_b32 v1, v2
> +
> +// CHECK: v_ffbh_u32_e32 v1, v2 ; encoding: [0x02,0x73,0x02,0x7e]
> +v_ffbh_u32 v1, v2
> +
> +// CHECK: v_ffbl_b32_e32 v1, v2 ; encoding: [0x02,0x75,0x02,0x7e]
> +v_ffbl_b32 v1, v2
> +
> +// CHECK: v_ffbh_i32_e32 v1, v2 ; encoding: [0x02,0x77,0x02,0x7e]
> +v_ffbh_i32_e32 v1, v2
> +
> +// CHECK: v_frexp_exp_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x79,0x02,0x7e]
> +v_frexp_exp_i32_f64 v1, v[2:3]
> +
> +// CHECK: v_frexp_mant_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7b,0x02,0x7e]
> +v_frexp_mant_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_fract_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7d,0x02,0x7e]
> +v_fract_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_frexp_exp_i32_f32_e32 v1, v2 ; encoding: [0x02,0x7f,0x02,0x7e]
> +v_frexp_exp_i32_f32 v1, v2
> +
> +// CHECK: v_frexp_mant_f32_e32 v1, v2 ; encoding: [0x02,0x81,0x02,0x7e]
> +v_frexp_mant_f32 v1, v2
> +
> +// CHECK: v_clrexcp ; encoding: [0x00,0x82,0x00,0x7e]
> +v_clrexcp
> +
> +// CHECK: v_movreld_b32_e32 v1, v2 ; encoding: [0x02,0x85,0x02,0x7e]
> +v_movreld_b32 v1, v2
> +
> +// CHECK: v_movrels_b32_e32 v1, v2 ; encoding: [0x02,0x87,0x02,0x7e]
> +v_movrels_b32 v1, v2
> +
> +// CHECK: v_movrelsd_b32_e32 v1, v2 ; encoding: [0x02,0x89,0x02,0x7e]
> +v_movrelsd_b32 v1, v2
> diff --git a/test/MC/R600/vop2-err.s b/test/MC/R600/vop2-err.s
> new file mode 100644
> index 0000000..a113100
> --- /dev/null
> +++ b/test/MC/R600/vop2-err.s
> @@ -0,0 +1,35 @@
> +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
> +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Generic checks
> +//===----------------------------------------------------------------------===//
> +
> +v_mul_i32_i24 v1, v2, 100
> +// CHECK: error: invalid operand for instruction
> +
> +//===----------------------------------------------------------------------===//
> +// _e32 checks
> +//===----------------------------------------------------------------------===//
> +
> +// Immediate src1
> +v_mul_i32_i24_e32 v1, v2, 100
> +// CHECK: error: invalid operand for instruction
> +
> +// sgpr src1
> +v_mul_i32_i24_e32 v1, v2, s3
> +// CHECK: error: invalid operand for instruction
> +
> +//===----------------------------------------------------------------------===//
> +// _e64 checks
> +//===----------------------------------------------------------------------===//
> +
> +// Immediate src0
> +v_mul_i32_i24_e64 v1, 100, v3
> +// CHECK: error: invalid operand for instruction
> +
> +// Immediate src1
> +v_mul_i32_i24_e64 v1, v2, 100
> +// CHECK: error: invalid operand for instruction
> +
> +// TODO: Constant bus restrictions
> diff --git a/test/MC/R600/vop2.s b/test/MC/R600/vop2.s
> new file mode 100644
> index 0000000..025dba9
> --- /dev/null
> +++ b/test/MC/R600/vop2.s
> @@ -0,0 +1,242 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Generic Checks for floating-point instructions (These have modifiers).
> +//===----------------------------------------------------------------------===//
> +
> +// TODO: 64-bit encoding of instructions with modifiers
> +
> +// _e32 suffix
> +// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
> +v_add_f32_e32 v1, v2, v3
> +
> +// src0 inline immediate
> +// CHECK: v_add_f32_e32 v1, 1.0, v3 ; encoding: [0xf2,0x06,0x02,0x06]
> +v_add_f32 v1, 1.0, v3
> +
> +// src0 negative inline immediate
> +// CHECK: v_add_f32_e32 v1, -1.0, v3 ; encoding: [0xf3,0x06,0x02,0x06]
> +v_add_f32 v1, -1.0, v3
> +
> +// src0 literal
> +// CHECK: v_add_f32_e32 v1, 0x42c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0x42]
> +v_add_f32 v1, 100.0, v3
> +
> +// src1 negative literal
> +// CHECK: v_add_f32_e32 v1, 0xc2c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0xc2]
> +v_add_f32 v1, -100.0, v3
The comment doesn't match the test, this is a negative literal in src0. 
There should also be tests with the literal in the other operand
> +
> +//===----------------------------------------------------------------------===//
> +// Generic Checks for integer instructions (These don't have modifiers).
> +//===----------------------------------------------------------------------===//
> +
> +// _e32 suffix
> +// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12]
> +v_mul_i32_i24_e32 v1, v2, v3
> +
> +// _e64 suffix
> +// CHECK: v_mul_i32_i24_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x02,0x00]
> +v_mul_i32_i24_e64 v1, v2, v3
> +
> +// src0 inline
> +// CHECK: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12]
> +v_mul_i32_i24 v1, 3, v3
> +
> +// src0 negative inline
> +// CHECK: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12]
> +v_mul_i32_i24 v1, -3, v3
> +
> +// src1 inline
> +// CHECK: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00]
> +v_mul_i32_i24 v1, v2, 3
> +
> +// src1 negative inline
> +// CHECK: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00]
> +v_mul_i32_i24 v1, v2, -3
> +
> +// src0 literal
> +// CHECK: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x12,0x64,0x00,0x00,0x00]
> +v_mul_i32_i24 v1, 100, v3
> +
> +// src1 negative literal
> +// CHECK: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x12,0x9c,0xff,0xff,0xff]
> +v_mul_i32_i24 v1, -100, v3
> +
> +//===----------------------------------------------------------------------===//
> +// Checks for legal operands
> +//===----------------------------------------------------------------------===//
> +
> +// src0 sgpr
> +// CHECK: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x12]
> +v_mul_i32_i24 v1, s2, v3
> +
> +// src1 sgpr
> +// CHECK: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x00,0x00]
> +v_mul_i32_i24 v1, v2, s3
> +
> +// src0, src1 same sgpr
> +// CHECK: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x04,0x00,0x00]
> +v_mul_i32_i24 v1, s2, s2
> +
> +// src0 sgpr, src1 inline
> +// CHECK: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x06,0x01,0x00]
> +v_mul_i32_i24 v1, s2, 3
> +
> +// src0 inline src1 sgpr
> +// CHECK: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00]
> +v_mul_i32_i24 v1, 3, s3
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +// CHECK: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00]
> +v_cndmask_b32 v1, v2, v3
> +
> +// CHECK: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02]
> +v_readlane_b32 s1, v2, s3
> +
> +// CHECK: v_writelane_b32 v1, s2, s3 ; encoding: [0x02,0x06,0x02,0x04]
> +v_writelane_b32 v1, s2, s3
> +
> +// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
> +v_add_f32 v1, v2, v3
> +
> +// CHECK: v_sub_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x08]
> +v_sub_f32 v1, v2, v3
> +
> +// CHECK: v_subrev_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0a]
> +v_subrev_f32 v1, v2, v3
> +
> +// CHECK: v_mac_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c]
> +v_mac_legacy_f32 v1, v2, v3
> +
> +// CHECK: v_mul_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e]
> +v_mul_legacy_f32_e32 v1, v2, v3
> +
> +// CHECK: v_mul_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10]
> +v_mul_f32 v1, v2, v3
> +
> +// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12]
> +v_mul_i32_i24 v1, v2, v3
> +
> +// CHECK: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14]
> +v_mul_hi_i32_i24 v1, v2, v3
> +
> +// CHECK: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16]
> +v_mul_u32_u24 v1, v2, v3
> +
> +// CHECK: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18]
> +v_mul_hi_u32_u24 v1, v2, v3
> +
> +// CHECK: v_min_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a]
> +v_min_legacy_f32_e32 v1, v2, v3
> +
> +// CHECK: v_max_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c]
> +v_max_legacy_f32 v1, v2, v3
> +
> +// CHECK: v_min_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e]
> +v_min_f32_e32 v1, v2, v3
> +
> +// CHECK: v_max_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20]
> +v_max_f32 v1, v2 v3
> +
> +// CHECK: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22]
> +v_min_i32 v1, v2, v3
> +
> +// CHECK: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24]
> +v_max_i32 v1, v2, v3
> +
> +// CHECK: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26]
> +v_min_u32 v1, v2, v3
> +
> +// CHECK: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28]
> +v_max_u32 v1, v2, v3
> +
> +// CHECK: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a]
> +v_lshr_b32 v1, v2, v3
> +
> +// CHECK: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c]
> +v_lshrrev_b32 v1, v2, v3
> +
> +// CHECK: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e]
> +v_ashr_i32 v1, v2, v3
> +
> +// CHECK: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30]
> +v_ashrrev_i32 v1, v2, v3
> +
> +// CHECK: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
> +v_lshl_b32_e32 v1, v2, v3
> +
> +// CHECK: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
> +v_lshlrev_b32 v1, v2, v3
> +
> +// CHECK: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
> +v_and_b32 v1, v2, v3
> +
> +// CHECK: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
> +v_or_b32 v1, v2, v3
> +
> +// CHECK: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
> +v_xor_b32 v1, v2, v3
> +
> +// CHECK: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
> +v_bfm_b32 v1, v2, v3
> +
> +// CHECK: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e]
> +v_mac_f32 v1, v2, v3
> +
> +// CHECK: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42]
> +v_madmk_f32 v1, v2, v3, 64.0
> +
> +// CHECK: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42]
> +v_madak_f32 v1, v2, v3, 64.0
> +
> +// CHECK: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44]
> +v_bcnt_u32_b32 v1, v2, v3
> +
> +// CHECK: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46]
> +v_mbcnt_lo_u32_b32 v1, v2, v3
> +
> +// CHECK: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48]
> +v_mbcnt_hi_u32_b32_e32 v1, v2, v3
> +
> +// CHECK: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
> +v_add_i32 v1, v2, v3
> +
> +// CHECK: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
> +v_sub_i32_e32 v1, v2, v3
> +
> +// CHECK: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
> +v_subrev_i32 v1, v2, v3
> +
> +// CHECK : v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
> +v_addc_u32 v1, v2, v3
> +
> +// CHECK: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
> +v_subb_u32 v1, v2, v3
> +
> +// CHECK: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
> +v_subbrev_u32 v1, v2, v3
> +
> +// CHECK: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
> +v_ldexp_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58]
> +v_cvt_pkaccum_u8_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a]
> +v_cvt_pknorm_i16_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c]
> +v_cvt_pknorm_u16_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e]
> +v_cvt_pkrtz_f16_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60]
> +v_cvt_pk_u16_u32_e32 v1, v2, v3
> +
> +// CHECK: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62]
> +v_cvt_pk_i16_i32 v1, v2, v3
> diff --git a/test/MC/R600/vop3.s b/test/MC/R600/vop3.s
> new file mode 100644
> index 0000000..7d1ba0b
> --- /dev/null
> +++ b/test/MC/R600/vop3.s
> @@ -0,0 +1,138 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// VOPC Instructions
> +//===----------------------------------------------------------------------===//
> +
> +//
> +// Modifier tests:
> +//
> +
> +v_cmp_lt_f32 s[2:3] -v4, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x20]
> +
> +v_cmp_lt_f32 s[2:3]  v4, -v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
> +
> +v_cmp_lt_f32 s[2:3] -v4, -v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x60]
> +
> +v_cmp_lt_f32 s[2:3] |v4|, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3] v4, |v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3] |v4|, |v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3] -|v4|, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x20]
> +
> +v_cmp_lt_f32 s[2:3] v4, -|v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x40]
> +
> +v_cmp_lt_f32 s[2:3] -|v4|, -|v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x60]
> +
> +//
> +// Instruction tests:
> +//
> +
> +v_cmp_f_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_eq_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x04,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_le_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x06,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_gt_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x08,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lg_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0a,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_ge_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00]
> +
> +// TODO: Finish VOPC
> +
> +//===----------------------------------------------------------------------===//
> +// VOP1 Instructions
> +//===----------------------------------------------------------------------===//
> +
> +//
> +// Modifier tests:
> +//
> +
> +v_fract_f32 v1, -v2
> +// CHECK: v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x20]
> +
> +v_fract_f32 v1, |v2|
> +// CHECK: v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x00]
> +
> +v_fract_f32 v1, -|v2|
> +// CHECK: v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x20]
> +
> +v_fract_f32 v1, v2 clamp
> +// CHECK: v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x00]
> +
> +v_fract_f32 v1, v2 mul:2
> +// CHECK: v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x08]
> +
> +v_fract_f32 v1, v2, div:2 clamp
> +// CHECK: v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x18]
> +
> +// TODO: Finish VOP1
> +
> +///===---------------------------------------------------------------------===//
> +// VOP2 Instructions
> +///===---------------------------------------------------------------------===//
> +
> +// TODO: Modifier tests
> +
> +v_cndmask_b32 v1, v3, v5, s[4:5]
> +// CHECK: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
> +
> +//TODO: readlane, writelane
> +
> +v_add_f32 v1, v3, s5
> +// CHECK: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_sub_f32 v1, v3, s5
> +// CHECK: v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x08,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_subrev_f32 v1, v3, s5
> +// CHECK: v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0a,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mac_legacy_f32 v1, v3, s5
> +// CHECK: v_mac_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mul_legacy_f32 v1, v3, s5
> +// CHECK: v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0e,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mul_f32 v1, v3, s5
> +// CHECK: v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x10,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mul_i32_i24 v1, v3, s5
> +// CHECK: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00]
> +
> +///===---------------------------------------------------------------------===//
> +// VOP3 Instructions
> +///===---------------------------------------------------------------------===//
> +
> +// TODO: Modifier tests
> +
> +v_mad_legacy_f32 v2, v4, v6, v8
> +// CHECK: v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0x80,0xd2,0x04,0x0d,0x22,0x04]
> +
> +
> +
> +
> +
> diff --git a/test/MC/R600/vopc.s b/test/MC/R600/vopc.s
> new file mode 100644
> index 0000000..f44919a
> --- /dev/null
> +++ b/test/MC/R600/vopc.s
> @@ -0,0 +1,40 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Generic Checks
> +//===----------------------------------------------------------------------===//
> +
> +// src0 sgpr
> +v_cmp_lt_f32 vcc, s2, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, s2, v4 ; encoding: [0x02,0x08,0x02,0x7c]
> +
> +// src0 inline immediate
> +v_cmp_lt_f32 vcc, 0, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, 0, v4 ; encoding: [0x80,0x08,0x02,0x7c]
> +
> +// src0 literal
> +v_cmp_lt_f32 vcc, 10.0, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, 0x41200000, v4 ; encoding: [0xff,0x08,0x02,0x7c,0x00,0x00,0x20,0x41]
> +
> +// src0, src1 max vgpr
> +v_cmp_lt_f32 vcc, v255, v255
> +// CHECK: v_cmp_lt_f32_e32 vcc, v255, v255 ; encoding: [0xff,0xff,0x03,0x7c]
> +
> +// force 32-bit encoding
> +v_cmp_lt_f32_e32 vcc, v2, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
> +
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +v_cmp_f_f32 vcc, v2, v4
> +// CHECK: v_cmp_f_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x00,0x7c]
> +
> +v_cmp_lt_f32 vcc, v2, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
> +
> +// TODO: Add tests for the rest of the instructions.
> +
> -- 1.8.1.5
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150313/1e5f4a77/attachment.html>


More information about the llvm-commits mailing list