PATCH: R600/SI: Experimental assembler / inline assembly support

Tom Stellard tom at stellard.net
Tue Apr 7 10:52:45 PDT 2015


Ping.

On Fri, Mar 20, 2015 at 05:41:58PM -0400, Tom Stellard wrote:
> Hi Matt,
> 
> Here are updated patches that address the rest of your comments,
> except for the one about the float cast, because I wasn't sure how to
> fix it.
> 
> -Tom
> 
> On Fri, Mar 20, 2015 at 11:43:17AM -0400, Tom Stellard wrote:
> > Hi Matt,
> > 
> > I'm working on an update patch.  Here are some responses to your
> > comments:
> > 
> > On Fri, Mar 13, 2015 at 10:47:38AM -0700, Matt Arsenault wrote:
> > > > +    case Match_InvalidOperand: {
> > > > +      SMLoc ErrorLoc = IDLoc;
> > > > +      if (ErrorInfo != ~0ULL) {
> > > > +        if (ErrorInfo >= Operands.size()) {
> > > > +          return Error(IDLoc, "too few operands for instruction");
> > > > +        }
> > > >   
> > > > +        ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
> > > Casting to a reference always looks weird, and there are a lot of these 
> > > in this patch. Why do you need to do this? Can you not cast the pointer 
> > > type before the deref and use -> for some weird reason?
> > 
> > Operands is a vector of std::unique_ptr, so you can't cast is as a
> > regular pointer type.  Every other assembler casts as reference
> > like this.
> > 
> > > > @@ -195,17 +564,104 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
> > > >   
> > > >     // If we successfully parsed the operand or if there as an error parsing,
> > > >     // we are done.
> > > > -  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
> > > > +  //
> > > > +  // If we are parsing after we reach EndOfStatement then this means we
> > > > +  // are appending default values to the Operands list.  This is only done
> > > > +  // by custom parser, so we shouldn't continue on to the generic parsing.
> > > > +  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
> > > > +      getLexer().is(AsmToken::EndOfStatement))
> > > >       return ResTy;
> > > >   
> > > > +  bool Negate = false, Abs = false;
> > > > +  if (getLexer().getKind()== AsmToken::Minus) {
> > > > +    Parser.Lex();
> > > > +    Negate = true;
> > > > +  }
> > > > +
> > > > +  if (getLexer().getKind() == AsmToken::Pipe) {
> > > > +    Parser.Lex();
> > > > +    Abs = true;
> > > > +  }
> > > > +
> > > >     switch(getLexer().getKind()) {
> > > >       case AsmToken::Integer: {
> > > > +      SMLoc S = Parser.getTok().getLoc();
> > > > +      int64_t IntVal;
> > > > +      if (getParser().parseAbsoluteExpression(IntVal))
> > > > +        return MatchOperand_ParseFail;
> > > > +      APInt IntVal32(32, IntVal);
> > > > +      if (IntVal32.getSExtValue() != IntVal) {
> > > > +        Error(S, "invalid immediate: only 32-bit values are legal");
> > > > +        return MatchOperand_ParseFail;
> > > > +      }
> > > > +
> > > > +      IntVal = IntVal32.getSExtValue();
> > > > +      if (Negate)
> > > > +        IntVal *= -1;
> > > > +      Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
> > > > +      return MatchOperand_Success;
> > > > +    }
> > > > +    case AsmToken::Real: {
> > > > +      // FIXME: We should emit an error if a double precisions floating-point
> > > > +      // value is used.  I'm not sure the best way to detect this.
> > > > +      SMLoc S = Parser.getTok().getLoc();
> > > >         int64_t IntVal;
> > > >         if (getParser().parseAbsoluteExpression(IntVal))
> > > >           return MatchOperand_ParseFail;
> > > > -      Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
> > > > +
> > > > +      APFloat F((float)APInt(64, IntVal).bitsToDouble());
> > > You should be able to avoid using the host float cast here
> > 
> > What should I do instead?
> > 
> > > > +      if (Negate)
> > > > +        F.changeSign();
> > > > +      Operands.push_back(
> > > > +          AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
> > > >         return MatchOperand_Success;
> > > >       }
> > > > +    case AsmToken::Identifier: {
> > > > +      SMLoc S, E;
> > > > +      unsigned RegNo;
> > > > +      if (!ParseRegister(RegNo, S, E)) {
> > > > +
> > > > +        bool HasModifiers = operandsHaveModifiers(Operands);
> > > > +        unsigned Modifiers = 0;
> > > > +
> > > > +        if (Negate)
> > > > +          Modifiers |= 0x1;
> > > > +
> > > > +        if (Abs) {
> > > > +          if (getLexer().getKind() != AsmToken::Pipe)
> > > > +            return MatchOperand_ParseFail;
> > > > +          Parser.Lex();
> > > > +          Modifiers |= 0x2;
> > > > +        }
> > > > +
> > > > +        if (Modifiers && !HasModifiers) {
> > > > +          // We are adding a modifier to src1 or src2 and previous sources
> > > > +          // don't have modifiers, so we need to go back and empty modifers
> > > > +          // for each previous source.
> > > > +          for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
> > > > +               --PrevRegIdx) {
> > > > +
> > > > +            AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
> > > > +            RegOp.setModifiers(0);
> > > > +          }
> > > > +        }
> > > > +
> > > > +
> > > > +        Operands.push_back(AMDGPUOperand::CreateReg(
> > > > +            RegNo, S, E, getContext().getRegisterInfo()));
> > > > +
> > > > +        if (HasModifiers || Modifiers) {
> > > > +          AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
> > > > +          RegOp.setModifiers(Modifiers);
> > > > +
> > > > +        }
> > > > +     }  else {
> > > > +      Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
> > > > +                                                    S));
> > > > +      Parser.Lex();
> > > > +     }
> > > > +     return MatchOperand_Success;
> > > > +    }
> > > >       default:
> > > >         return MatchOperand_NoMatch;
> > > >     }
> > > > +void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
> > > > +                               const OperandVector &Operands) {
> > > > +  unsigned i = 1;
> > > > +
> > > > +  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> > > > +
> > > > +  for (unsigned e = Operands.size(); i != e; ++i) {
> > > This loop condition looks weird. You don't seem to be using the i after 
> > > the loop, so its definition should move into the for
> > > > +    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> > > > +
> > > > +    // Add the register arguments
> > > > +    if (Op.isReg()) {
> > > > +      Op.addRegOperands(Inst, 1);
> > > > +      continue;
> > > > +    }
> > > > +
> > > > +    // Handle the case where soffset is an immediate
> > > > +    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
> > > > +      Op.addImmOperands(Inst, 1);
> > > > +      continue;
> > > > +    }
> > > > +
> > > > +    // Handle tokens like 'offen' which are sometimes hard-coded into the
> > > > +    // asm string.  There are no MCInst operands for these.
> > > > +    if (Op.isToken()) {
> > > > +      continue;
> > > > +    }
> > > > +    assert(Op.isImm());
> > > > +
> > > > +    // Handle optional arguments
> > > > +    OptionalIdx[Op.getImmTy()] = i;
> > > > +  }
> > > > +
> > > > +  assert(OptionalIdx.size() == 4);
> > > > +
> > > > +  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
> > > > +  unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
> > > > +  unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
> > > > +  unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
> > > > +
> > > > +  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
> > > > +  ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
> > > > +  ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
> > > > +  ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
> > > Is this defaulting Offset/GLC/SLC/TFE to 1? Shouldn't these be 0?
> > 
> > No, 1 is the number of operands that should be added to the instruction.
> > > > +}
> > > > +
> > > > +//===----------------------------------------------------------------------===//
> > > > +//                         SI Inline Assembly Support
> > > > +//===----------------------------------------------------------------------===//
> > > > +
> > > > +std::pair<unsigned, const TargetRegisterClass *>
> > > > +SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
> > > > +                                               const std::string &Constraint,
> > > StringRef operand instead?
> > 
> > It's a virtual function, so I'm don't think I can change it.
> > 
> > > > +                                               MVT VT) const {
> > > > +  dbgs() << "Constraint = " << Constraint << "\n";
> > > > +  dbgs() << "VT = " << EVT(VT).getEVTString() << "\n";
> > > Leftover debug printing
> > > > +  if (Constraint == "r") {
> > > > +    switch(VT.SimpleTy) {
> > > > +      default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
> > > > +      case MVT::i64:
> > > > +        return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
> > > > +      case MVT::i32:
> > > > +        return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
> > > > +    }
> > > > +  }
> > > > +
> > > > +  if (Constraint.size() > 1) {
> > > > +    const TargetRegisterClass *RC = nullptr;
> > > > +    if (Constraint[1] == 'v') {
> > > > +      RC = &AMDGPU::VGPR_32RegClass;
> > > > +    } else if (Constraint[1] == 's') {
> > > > +      RC = &AMDGPU::SGPR_32RegClass;
> > > > +    }
> > > > +
> > > > +    if (RC) {
> > > > +      unsigned Idx = std::atoi(Constraint.substr(2).c_str());
> > > > +      if (Idx < RC->getNumRegs())
> > > > +        return std::make_pair(RC->getRegister(Idx), RC);
> > > > +    }
> > > > +  }
> > > > +  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
> > > > +}
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

> From fa9638c2f13d1512c1a72fdd242f92e958a570be Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Fri, 20 Mar 2015 13:54:28 -0400
> Subject: [PATCH 1/3] R600/SI: Don't print offset0/offset1 DS operands when
>  they are 0
> 
> ---
>  lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp  | 12 ++++++---
>  .../ds-negative-offset-addressing-mode-loop.ll     |  2 +-
>  test/CodeGen/R600/ds_read2.ll                      | 24 ++++++++---------
>  test/CodeGen/R600/ds_read2st64.ll                  |  8 +++---
>  test/CodeGen/R600/ds_write2.ll                     | 30 +++++++++++-----------
>  test/CodeGen/R600/ds_write2st64.ll                 |  6 ++---
>  test/CodeGen/R600/unaligned-load-store.ll          |  4 +--
>  7 files changed, 45 insertions(+), 41 deletions(-)
> 
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> index d62fd3f..c7f9da6 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -89,14 +89,18 @@ void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
>  
>  void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
>                                          raw_ostream &O) {
> -  O << " offset0:";
> -  printU8ImmDecOperand(MI, OpNo, O);
> +  if (MI->getOperand(OpNo).getImm()) {
> +    O << " offset0:";
> +    printU8ImmDecOperand(MI, OpNo, O);
> +  }
>  }
>  
>  void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
>                                          raw_ostream &O) {
> -  O << " offset1:";
> -  printU8ImmDecOperand(MI, OpNo, O);
> +  if (MI->getOperand(OpNo).getImm()) {
> +    O << " offset1:";
> +    printU8ImmDecOperand(MI, OpNo, O);
> +  }
>  }
>  
>  void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
> diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
> index c381fc4..e7e13d6 100644
> --- a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
> +++ b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
> @@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.barrier.local() #1
>  ; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
>  ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
>  
> -; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
> +; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1
>  ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
>  ; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
>  ; CHECK: s_endpgm
> diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll
> index f53b6c0..0f63026 100644
> --- a/test/CodeGen/R600/ds_read2.ll
> +++ b/test/CodeGen/R600/ds_read2.ll
> @@ -7,7 +7,7 @@
>   @lds.f64 = addrspace(3) global [512 x double] undef, align 8
>  
>  ; SI-LABEL: @simple_read2_f32
> -; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
> +; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8
>  ; SI: s_waitcnt lgkmcnt(0)
>  ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
>  ; SI: buffer_store_dword [[RESULT]]
> @@ -26,7 +26,7 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 {
>  }
>  
>  ; SI-LABEL: @simple_read2_f32_max_offset
> -; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
> +; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255
>  ; SI: s_waitcnt lgkmcnt(0)
>  ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
>  ; SI: buffer_store_dword [[RESULT]]
> @@ -63,7 +63,7 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
>  }
>  
>  ; SI-LABEL: @simple_read2_f32_x2
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
>  ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
>  ; SI: s_endpgm
>  define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
> @@ -94,7 +94,7 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
>  
>  ; Make sure there is an instruction between the two sets of reads.
>  ; SI-LABEL: @simple_read2_f32_x2_barrier
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
>  ; SI: s_barrier
>  ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
>  ; SI: s_endpgm
> @@ -313,7 +313,7 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs
>  
>  ; SI-LABEL: @simple_read2_f64
>  ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
> -; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
> +; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset1:8
>  ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
>  ; SI: buffer_store_dwordx2 [[RESULT]]
>  ; SI: s_endpgm
> @@ -331,7 +331,7 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 {
>  }
>  
>  ; SI-LABEL: @simple_read2_f64_max_offset
> -; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
> +; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255
>  ; SI: s_endpgm
>  define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -366,7 +366,7 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
>  
>  ; Alignment only 4
>  ; SI-LABEL: @misaligned_read2_f64
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
>  ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
>  ; SI: s_endpgm
>  define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
> @@ -386,7 +386,7 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)
>  
>  ; SI-LABEL: @load_constant_adjacent_offsets
>  ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
>  define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
>    %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
>    %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
> @@ -397,7 +397,7 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
>  
>  ; SI-LABEL: @load_constant_disjoint_offsets
>  ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2
>  define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
>    %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
>    %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
> @@ -410,7 +410,7 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
>  
>  ; SI-LABEL: @load_misaligned64_constant_offsets
>  ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
>  ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
>  define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
>    %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
> @@ -425,8 +425,8 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
>  ; SI-LABEL: @load_misaligned64_constant_large_offsets
>  ; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
>  ; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
> -; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
> -; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
> +; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1
> +; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1
>  ; SI: s_endpgm
>  define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
>    %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
> diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll
> index 482debb..54b3b45 100644
> --- a/test/CodeGen/R600/ds_read2st64.ll
> +++ b/test/CodeGen/R600/ds_read2st64.ll
> @@ -5,7 +5,7 @@
>  
>  
>  ; SI-LABEL: @simple_read2st64_f32_0_1
> -; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
>  ; SI: s_waitcnt lgkmcnt(0)
>  ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
>  ; SI: buffer_store_dword [[RESULT]]
> @@ -117,7 +117,7 @@ define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
>  }
>  
>  ; SI-LABEL: @simple_read2st64_f64_0_1
> -; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
>  ; SI: s_waitcnt lgkmcnt(0)
>  ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
>  ; SI: buffer_store_dwordx2 [[RESULT]]
> @@ -158,7 +158,7 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac
>  ; Alignment only
>  
>  ; SI-LABEL: @misaligned_read2st64_f64
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
>  ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
>  ; SI: s_endpgm
>  define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
> @@ -237,7 +237,7 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double
>  
>  ; SI-LABEL: @byte_size_only_divisible_64_read2_f64
>  ; SI-NOT: ds_read2st_b64
> -; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
> +; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8
>  ; SI: s_endpgm
>  define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/R600/ds_write2.ll
> index d06f780..60bcbcf 100644
> --- a/test/CodeGen/R600/ds_write2.ll
> +++ b/test/CodeGen/R600/ds_write2.ll
> @@ -7,7 +7,7 @@
>  ; SI-LABEL: @simple_write2_one_val_f32
>  ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
>  ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
> +; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8
>  ; SI: s_endpgm
>  define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -25,7 +25,7 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1
>  ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
>  ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
>  ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 
> +; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 
>  ; SI: s_endpgm
>  define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -84,7 +84,7 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float
>  ; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
>  ; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
>  ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
> +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
>  ; SI: s_endpgm
>  define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -105,7 +105,7 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2
>  ; SI-LABEL: @simple_write2_two_val_subreg2_f32
>  ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
>  ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
> +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
>  ; SI: s_endpgm
>  define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -124,7 +124,7 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa
>  ; SI-LABEL: @simple_write2_two_val_subreg4_f32
>  ; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
>  ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
> +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
>  ; SI: s_endpgm
>  define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -144,7 +144,7 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa
>  ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
>  ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
>  ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
> +; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
>  ; SI: s_endpgm
>  define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -179,7 +179,7 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add
>  }
>  
>  ; SI-LABEL: @simple_write2_two_val_f32_x2
> -; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
> +; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8
>  ; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
>  ; SI: s_endpgm
>  define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
> @@ -268,7 +268,7 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add
>  ; SI-LABEL: @simple_write2_one_val_f64
>  ; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
>  ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
> -; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
> +; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8
>  ; SI: s_endpgm
>  define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -285,7 +285,7 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace
>  ; SI-LABEL: @misaligned_simple_write2_one_val_f64
>  ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
>  ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1
> +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1
>  ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
>  ; SI: s_endpgm
>  define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
> @@ -304,7 +304,7 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl
>  ; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
>  ; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
>  ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
> -; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
> +; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
>  ; SI: s_endpgm
>  define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -324,7 +324,7 @@ define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace
>  
>  ; SI-LABEL: @store_constant_adjacent_offsets
>  ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
>  define void @store_constant_adjacent_offsets() {
>    store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
>    store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
> @@ -334,7 +334,7 @@ define void @store_constant_adjacent_offsets() {
>  ; SI-LABEL: @store_constant_disjoint_offsets
>  ; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
>  ; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
> +; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2
>  define void @store_constant_disjoint_offsets() {
>    store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
>    store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
> @@ -345,7 +345,7 @@ define void @store_constant_disjoint_offsets() {
>  
>  ; SI-LABEL: @store_misaligned64_constant_offsets
>  ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
>  ; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
>  define void @store_misaligned64_constant_offsets() {
>    store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
> @@ -358,8 +358,8 @@ define void @store_misaligned64_constant_offsets() {
>  ; SI-LABEL: @store_misaligned64_constant_large_offsets
>  ; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
>  ; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
> -; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> -; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
> +; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
>  ; SI: s_endpgm
>  define void @store_misaligned64_constant_large_offsets() {
>    store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
> diff --git a/test/CodeGen/R600/ds_write2st64.ll b/test/CodeGen/R600/ds_write2st64.ll
> index 2044df2..1d9d881 100644
> --- a/test/CodeGen/R600/ds_write2st64.ll
> +++ b/test/CodeGen/R600/ds_write2st64.ll
> @@ -7,7 +7,7 @@
>  ; SI-LABEL: @simple_write2st64_one_val_f32_0_1
>  ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
>  ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1
> +; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1
>  ; SI: s_endpgm
>  define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -46,7 +46,7 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add
>  ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
>  ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
>  ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
> +; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
>  ; SI: s_endpgm
>  define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -85,7 +85,7 @@ define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, d
>  
>  ; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
>  ; SI-NOT: ds_write2st64_b64
> -; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
> +; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8
>  ; SI: s_endpgm
>  define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
> index efb1de2..82d88eb 100644
> --- a/test/CodeGen/R600/unaligned-load-store.ll
> +++ b/test/CodeGen/R600/unaligned-load-store.ll
> @@ -195,7 +195,7 @@ define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out,
>  
>  ; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
>  ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
> -; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
> +; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
>  ; SI: s_endpgm
>  define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
>    %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
> @@ -243,7 +243,7 @@ define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
>  
>  ; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
>  ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
> -; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
>  ; SI: s_endpgm
>  define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
>    %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
> -- 
> 1.8.1.5
> 

> From e8a8d61e6b2df9c251914220536e281dade8b35c Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Fri, 20 Mar 2015 16:11:57 -0400
> Subject: [PATCH 2/3] R600/SI: Add missing SOPK instructions
> 
> ---
>  lib/Target/R600/SIInstrFormats.td | 13 ++++++++++++
>  lib/Target/R600/SIInstrInfo.td    | 44 +++++++++++++++++++++++++++++++++++----
>  lib/Target/R600/SIInstructions.td | 28 +++++++++++++++++--------
>  3 files changed, 72 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 4167590..e7a07a1 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -181,6 +181,19 @@ class SOPKe <bits<5> op> : Enc32 {
>    let Inst{31-28} = 0xb; //encoding
>  }
>  
> +class SOPK64e <bits<5> op> : Enc64 {
> +  bits <7> sdst = 0;
> +  bits <16> simm16;
> +  bits <32> imm;
> +
> +  let Inst{15-0} = simm16;
> +  let Inst{22-16} = sdst;
> +  let Inst{27-23} = op;
> +  let Inst{31-28} = 0xb;
> +
> +  let Inst{63-32} = imm;
> +}
> +
>  class SOPPe <bits<7> op> : Enc32 {
>    bits <16> simm16;
>  
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 86e1082..345e699 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -546,6 +546,16 @@ class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
>    SOPKe <op.VI>,
>    SIMCInstr<opName, SISubtarget.VI>;
>  
> +multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
> +                   string asm = opName#opAsm> {
> +  def "" : SOPK_Pseudo <opName, outs, ins, []>;
> +
> +  def _si : SOPK_Real_si <op, opName, outs, ins, asm>;
> +
> +  def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>;
> +
> +}
> +
>  multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
>    def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0),
>      pattern>;
> @@ -561,13 +571,39 @@ multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
>    def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
>      (ins SReg_32:$src0, u16imm:$src1), pattern>;
>  
> -  def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
> -    (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
> +  let DisableEncoding = "$dst" in {
> +    def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
> +      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
>  
> -  def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
> -    (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
> +    def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
> +      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
> +  }
>  }
>  
> +multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m <
> +  op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16),
> +  " $sdst, $simm16"
> +>;
> +
> +multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
> +                       string argAsm, string asm = opName#argAsm> {
> +
> +  def "" : SOPK_Pseudo <opName, outs, ins, []>;
> +
> +  def _si : SOPK <outs, ins, asm, []>,
> +            SOPK64e <op.SI>,
> +            SIMCInstr<opName, SISubtarget.SI> {
> +              let AssemblerPredicates = [isSICI];
> +              let isCodeGenOnly = 0;
> +            }
> +
> +  def _vi : SOPK <outs, ins, asm, []>,
> +            SOPK64e <op.VI>,
> +            SIMCInstr<opName, SISubtarget.VI> {
> +              let AssemblerPredicates = [isVI];
> +              let isCodeGenOnly = 0;
> +            }
> +}
>  //===----------------------------------------------------------------------===//
>  // SMRD classes
>  //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 5f02a31..d6e4986 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -384,6 +384,7 @@ defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32",
>  >;
>  */
>  
> +defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>;
>  defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>;
>  defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>;
>  defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>;
> @@ -397,18 +398,27 @@ defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>;
>  defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>;
>  } // End isCompare = 1
>  
> -let isCommutable = 1 in {
> -  let Defs = [SCC], isCommutable = 1 in {
> -    defm S_ADDK_I32 : SOPK_32 <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
> -  }
> -  defm S_MULK_I32 : SOPK_32 <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
> +let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
> +    Constraints = "$sdst = $src0" in {
> +  defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
> +  defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
>  }
>  
> -//defm S_CBRANCH_I_FORK : SOPK_ <sopk<0x11, 0x10>, "s_cbranch_i_fork", []>;
> +defm S_CBRANCH_I_FORK : SOPK_m <
> +  sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs),
> +  (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16"
> +>;
>  defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>;
> -defm S_SETREG_B32 : SOPK_32 <sopk<0x13, 0x12>, "s_setreg_b32", []>;
> -defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
> -//defm S_SETREG_IMM32_B32 : SOPK_32 <sopk<0x15, 0x14>, "s_setreg_imm32_b32", []>;
> +defm S_SETREG_B32 : SOPK_m <
> +  sopk<0x13, 0x12>, "s_setreg_b32", (outs),
> +  (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16"
> +>;
> +// FIXME: Not on SI?
> +//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
> +defm S_SETREG_IMM32_B32 : SOPK_IMM32 <
> +  sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs),
> +  (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16"
> +>;
>  
>  //===----------------------------------------------------------------------===//
>  // SOPP Instructions
> -- 
> 1.8.1.5
> 

> From 86d7bb8f3b90415db396ac688e42ac89fe9ac208 Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Fri, 14 Nov 2014 06:22:05 -0500
> Subject: [PATCH 3/3] R600/SI: Initial support for assembler and inline
>  assembly
> 
> This is currently considered experimental, but most of the more
> commonly used instructions should work.
> 
> So far only SI has been extensively tested, CI and VI probably work too,
> but may be buggy.  The current set of tests cases do not give complete
> coverage, but I think it is sufficient for an experimental assembler.
> 
> See the documentation in R600Usage for more information.
> ---
>  docs/R600Usage.rst                                |   60 +-
>  lib/Target/R600/AMDGPU.td                         |   24 +-
>  lib/Target/R600/AMDGPUAsmPrinter.cpp              |   22 +
>  lib/Target/R600/AMDGPUAsmPrinter.h                |    4 +
>  lib/Target/R600/AMDGPUSubtarget.cpp               |    1 +
>  lib/Target/R600/AMDGPUSubtarget.h                 |    3 +
>  lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp     | 1095 +++++++++++++++++++--
>  lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp |    5 +-
>  lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h   |    2 +
>  lib/Target/R600/SIISelLowering.cpp                |   35 +
>  lib/Target/R600/SIISelLowering.h                  |    4 +
>  lib/Target/R600/SIInstrFormats.td                 |   18 +-
>  lib/Target/R600/SIInstrInfo.td                    |  228 ++++-
>  lib/Target/R600/SIInstructions.td                 |   15 +-
>  lib/Target/R600/SIRegisterInfo.td                 |   47 +-
>  test/MC/R600/ds-err.s                             |   23 +
>  test/MC/R600/ds.s                                 |  337 +++++++
>  test/MC/R600/mubuf.s                              |  352 +++++++
>  test/MC/R600/smrd.s                               |   32 +
>  test/MC/R600/sop1-err.s                           |   37 +
>  test/MC/R600/sop1.s                               |  177 ++++
>  test/MC/R600/sop2.s                               |  131 +++
>  test/MC/R600/sopc.s                               |    9 +
>  test/MC/R600/sopk.s                               |   66 ++
>  test/MC/R600/sopp.s                               |   14 +-
>  test/MC/R600/vop1.s                               |  182 ++++
>  test/MC/R600/vop2-err.s                           |   35 +
>  test/MC/R600/vop2.s                               |  242 +++++
>  test/MC/R600/vop3.s                               |  138 +++
>  test/MC/R600/vopc.s                               |   40 +
>  30 files changed, 3236 insertions(+), 142 deletions(-)
>  create mode 100644 test/MC/R600/ds-err.s
>  create mode 100644 test/MC/R600/ds.s
>  create mode 100644 test/MC/R600/mubuf.s
>  create mode 100644 test/MC/R600/smrd.s
>  create mode 100644 test/MC/R600/sop1-err.s
>  create mode 100644 test/MC/R600/sop1.s
>  create mode 100644 test/MC/R600/sop2.s
>  create mode 100644 test/MC/R600/sopc.s
>  create mode 100644 test/MC/R600/sopk.s
>  create mode 100644 test/MC/R600/vop1.s
>  create mode 100644 test/MC/R600/vop2-err.s
>  create mode 100644 test/MC/R600/vop2.s
>  create mode 100644 test/MC/R600/vop3.s
>  create mode 100644 test/MC/R600/vopc.s
> 
> diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst
> index 48a30c8..093cdd7 100644
> --- a/docs/R600Usage.rst
> +++ b/docs/R600Usage.rst
> @@ -6,22 +6,51 @@ Introduction
>  ============
>  
>  The R600 back-end provides ISA code generation for AMD GPUs, starting with
> -the R600 family up until the current Sea Islands (GCN Gen 2).
> +the R600 family up until the current Volcanic Islands (GCN Gen 3).
>  
>  
>  Assembler
>  =========
>  
> -The assembler is currently a work in progress and not yet complete.  Below
> -are the currently supported features.
> +The assembler is currently considered experimental.
> +
> +For syntax examples look in test/MC/R600.
> +
> +Below some of the currently supported features (modulo bugs).  These
> +all apply to the Southern Islands ISA, Sea Islands and Volcanic Islands
> +are also supported but may be missing some instructions and have more bugs:
> +
> +DS Instructions
> +---------------
> +All DS instructions are supported.
> +
> +MUBUF Instructions
> +------------------
> +All non-atomic MUBUF instructions are supported.
> +
> +SMRD Instructions
> +-----------------
> +Only the s_load_dword* SMRD instructions are supported.
> +
> +SOP1 Instructions
> +-----------------
> +All SOP1 instructions are supported.
> +
> +SOP2 Instructions
> +-----------------
> +All SOP2 instructions are supported.
> +
> +SOPC Instructions
> +-----------------
> +All SOPC instructions are supported.
>  
>  SOPP Instructions
>  -----------------
>  
> -Unless otherwise mentioned, all SOPP instructions that with an operand
> -accept a integer operand(s) only.  No verification is performed on the
> -operands, so it is up to the programmer to be familiar with the range
> -or acceptable values.
> +Unless otherwise mentioned, all SOPP instructions that have one or more
> +operands accept integer operands only.  No verification is performed
> +on the operands, so it is up to the programmer to be familiar with the
> +range or acceptable values.
>  
>  s_waitcnt
>  ^^^^^^^^^
> @@ -41,3 +70,20 @@ wait for.
>     // Wait for vmcnt counter to be 1.
>     s_waitcnt vmcnt(1)
>  
> +VOP1, VOP2, VOP3, VOPC Instructions
> +-----------------------------------
> +
> +All 32-bit and 64-bit encodings should work.
> +
> +The assembler will automatically detect which encoding size to use for
> +VOP1, VOP2, and VOPC instructions based on the operands.  If you want to force
> +a specific encoding size, you can add an _e32 (for 32-bit encoding) or
> +_e64 (for 64-bit encoding) suffix to the instruction.  Most, but not all
> +instructions support an explicit suffix.  These are all valid assembly
> +strings:
> +
> +.. code-block:: nasm
> +
> +   v_mul_i32_i24 v1, v2, v3
> +   v_mul_i32_i24_e32 v1, v2, v3
> +   v_mul_i32_i24_e64 v1, v2, v3
> diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
> index e5d5ce2..2eb805e 100644
> --- a/lib/Target/R600/AMDGPU.td
> +++ b/lib/Target/R600/AMDGPU.td
> @@ -133,6 +133,20 @@ class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
>          !cast<string>(Value),
>          "The size of local memory in bytes">;
>  
> +def FeatureGCN : SubtargetFeature<"gcn",
> +        "IsGCN",
> +        "true",
> +        "GCN or newer GPU">;
> +
> +def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
> +        "GCN1Encoding",
> +        "true",
> +        "Encoding format for SI and CI">;
> +
> +def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
> +        "GCN3Encoding",
> +        "true",
> +        "Encoding format for VI">;
>  class SubtargetFeatureGeneration <string Value,
>                                    list<SubtargetFeature> Implies> :
>          SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
> @@ -158,15 +172,17 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
>  
>  def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
>          [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
> -         FeatureWavefrontSize64]>;
> +         FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding]>;
>  
>  def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
>          [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
> -         FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
> +         FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
> +         FeatureGCN1Encoding]>;
>  
>  def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
>          [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
> -         FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
> +         FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
> +         FeatureGCN3Encoding]>;
>  
>  //===----------------------------------------------------------------------===//
>  
> @@ -197,8 +213,10 @@ def NullALU : InstrItinClass;
>  
>  class PredicateControl {
>    Predicate SubtargetPredicate;
> +  list<Predicate> AssemblerPredicates = [];
>    list<Predicate> OtherPredicates = [];
>    list<Predicate> Predicates = !listconcat([SubtargetPredicate],
> +                                            AssemblerPredicates,
>                                              OtherPredicates);
>  }
>  
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> index 5e1b6a3..b7a48c3 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> @@ -17,6 +17,7 @@
>  //
>  
>  #include "AMDGPUAsmPrinter.h"
> +#include "InstPrinter/AMDGPUInstPrinter.h"
>  #include "AMDGPU.h"
>  #include "AMDKernelCodeT.h"
>  #include "AMDGPUSubtarget.h"
> @@ -577,3 +578,24 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
>  
>    OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header)));
>  }
> +
> +bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
> +                                       unsigned AsmVariant,
> +                                       const char *ExtraCode, raw_ostream &O) {
> +  if (ExtraCode && ExtraCode[0]) {
> +    if (ExtraCode[1] != 0)
> +      return true; // Unknown modifier.
> +
> +    switch (ExtraCode[0]) {
> +    default:
> +      // See if this is a generic print operand
> +      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
> +    case 'r':
> +      break;
> +    }
> +  }
> +
> +  AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
> +                                     *TM.getSubtargetImpl()->getRegisterInfo());
> +  return false;
> +}
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
> index 58ffb1e..824cc43 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.h
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.h
> @@ -99,6 +99,10 @@ public:
>  
>    void EmitEndOfAsmFile(Module &M) override;
>  
> +  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
> +                       unsigned AsmVariant, const char *ExtraCode,
> +                       raw_ostream &O);
> +
>  protected:
>    std::vector<std::string> DisasmLines, HexLines;
>    size_t DisasmLineMaxLen;
> diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
> index 0ead652..259224a 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.cpp
> +++ b/lib/Target/R600/AMDGPUSubtarget.cpp
> @@ -71,6 +71,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
>        EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
>        WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
>        EnableVGPRSpilling(false), SGPRInitBug(false),
> +      IsGCN(false), GCN1Encoding(false), GCN3Encoding(false),
>        FrameLowering(TargetFrameLowering::StackGrowsUp,
>                      64 * 16, // Maximum stack alignment (long16)
>                      0),
> diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
> index 403a3e4..aeb0817 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.h
> +++ b/lib/Target/R600/AMDGPUSubtarget.h
> @@ -71,6 +71,9 @@ private:
>    int LocalMemorySize;
>    bool EnableVGPRSpilling;
>    bool SGPRInitBug;
> +  bool IsGCN;
> +  bool GCN1Encoding;
> +  bool GCN3Encoding;
>  
>    AMDGPUFrameLowering FrameLowering;
>    std::unique_ptr<AMDGPUTargetLowering> TLInfo;
> diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> index 3b4ba1a..bd202a1 100644
> --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> @@ -8,6 +8,8 @@
>  //===----------------------------------------------------------------------===//
>  
>  #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
> +#include "SIDefines.h"
> +#include "llvm/ADT/APFloat.h"
>  #include "llvm/ADT/SmallString.h"
>  #include "llvm/ADT/SmallVector.h"
>  #include "llvm/ADT/STLExtras.h"
> @@ -27,77 +29,105 @@
>  #include "llvm/Support/SourceMgr.h"
>  #include "llvm/Support/TargetRegistry.h"
>  #include "llvm/Support/raw_ostream.h"
> +#include "llvm/Support/Debug.h"
>  
>  using namespace llvm;
>  
>  namespace {
>  
> -class AMDGPUAsmParser : public MCTargetAsmParser {
> -  MCSubtargetInfo &STI;
> -  MCAsmParser &Parser;
> -
> -
> -  /// @name Auto-generated Match Functions
> -  /// {
> -
> -#define GET_ASSEMBLER_HEADER
> -#include "AMDGPUGenAsmMatcher.inc"
> -
> -  /// }
> -
> -public:
> -  AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
> -               const MCInstrInfo &_MII,
> -               const MCTargetOptions &Options)
> -      : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
> -    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
> -  }
> -  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
> -  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
> -                               OperandVector &Operands, MCStreamer &Out,
> -                               uint64_t &ErrorInfo,
> -                               bool MatchingInlineAsm) override;
> -  bool ParseDirective(AsmToken DirectiveID) override;
> -  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
> -  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
> -                        SMLoc NameLoc, OperandVector &Operands) override;
> -
> -  bool parseCnt(int64_t &IntVal);
> -  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
> -};
> +struct OptionalOperand;
>  
>  class AMDGPUOperand : public MCParsedAsmOperand {
>    enum KindTy {
>      Token,
> -    Immediate
> +    Immediate,
> +    Register,
> +    Expression
>    } Kind;
>  
> +  SMLoc StartLoc, EndLoc;
> +
>  public:
>    AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
>  
> +  MCContext *Ctx;
> +
> +  enum ImmTy {
> +    ImmTyNone,
> +    ImmTyDSOffset0,
> +    ImmTyDSOffset1,
> +    ImmTyGDS,
> +    ImmTyOffset,
> +    ImmTyGLC,
> +    ImmTySLC,
> +    ImmTyTFE,
> +    ImmTyClamp,
> +    ImmTyOMod
> +  };
> +
>    struct TokOp {
>      const char *Data;
>      unsigned Length;
>    };
>  
>    struct ImmOp {
> +    bool IsFPImm;
> +    ImmTy Type;
>      int64_t Val;
>    };
>  
> +  struct RegOp {
> +    unsigned RegNo;
> +    int Modifiers;
> +    const MCRegisterInfo *TRI;
> +  };
> +
>    union {
>      TokOp Tok;
>      ImmOp Imm;
> +    RegOp Reg;
> +    const MCExpr *Expr;
>    };
>  
>    void addImmOperands(MCInst &Inst, unsigned N) const {
>      Inst.addOperand(MCOperand::CreateImm(getImm()));
>    }
> -  void addRegOperands(MCInst &Inst, unsigned N) const {
> -    llvm_unreachable("addRegOperands");
> -  }
> +
>    StringRef getToken() const {
>      return StringRef(Tok.Data, Tok.Length);
>    }
> +
> +  void addRegOperands(MCInst &Inst, unsigned N) const {
> +    Inst.addOperand(MCOperand::CreateReg(getReg()));
> +  }
> +
> +  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
> +    if (isReg())
> +      addRegOperands(Inst, N);
> +    else
> +      addImmOperands(Inst, N);
> +  }
> +
> +  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
> +    Inst.addOperand(MCOperand::CreateImm(Reg.Modifiers));
> +    addRegOperands(Inst, N);
> +  }
> +
> +  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
> +    if (isImm())
> +      addImmOperands(Inst, N);
> +    else {
> +      assert(isExpr());
> +      Inst.addOperand(MCOperand::CreateExpr(Expr));
> +    }
> +  }
> +
> +  bool defaultTokenHasSuffix() const {
> +    StringRef Token(Tok.Data, Tok.Length);
> +
> +    return Token.endswith("_e32") || Token.endswith("_e64");
> +  }
> +
>    bool isToken() const override {
>      return Kind == Token;
>    }
> @@ -106,52 +136,369 @@ public:
>      return Kind == Immediate;
>    }
>  
> +  bool isInlineImm() const {
> +    float F = BitsToFloat(Imm.Val);
> +    // TODO: Add 0.5pi for VI
> +    return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
> +           (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
> +           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
> +  }
> +
> +  bool isDSOffset0() const {
> +    assert(isImm());
> +    return Imm.Type == ImmTyDSOffset0;
> +  }
> +
> +  bool isDSOffset1() const {
> +    assert(isImm());
> +    return Imm.Type == ImmTyDSOffset1;
> +  }
> +
>    int64_t getImm() const {
>      return Imm.Val;
>    }
>  
> +  enum ImmTy getImmTy() const {
> +    assert(isImm());
> +    return Imm.Type;
> +  }
> +
>    bool isReg() const override {
> -    return false;
> +    return Kind == Register && Reg.Modifiers == -1;
> +  }
> +
> +  bool isRegWithInputMods() const {
> +    return Kind == Register && Reg.Modifiers != -1;
> +  }
> +
> +  void setModifiers(unsigned Mods) {
> +    assert(isReg());
> +    Reg.Modifiers = Mods;
>    }
>  
>    unsigned getReg() const override {
> -    return 0;
> +    return Reg.RegNo;
> +  }
> +
> +  bool isRegOrImm() const {
> +    return isReg() || isImm();
> +  }
> +
> +  bool isRegClass(unsigned RCID) const {
> +    return Reg.TRI->getRegClass(RCID).contains(getReg());
> +  }
> +
> +  bool isSCSrc32() const {
> +    return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
> +  }
> +
> +  bool isSSrc32() const {
> +    return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
> +  }
> +
> +  bool isSSrc64() const {
> +    return isImm() || isInlineImm() ||
> +           (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
> +  }
> +
> +  bool isVCSrc32() const {
> +    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
> +  }
> +
> +  bool isVCSrc64() const {
> +    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
> +  }
> +
> +  bool isVSrc32() const {
> +    return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
> +  }
> +
> +  bool isVSrc64() const {
> +    return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
>    }
>  
>    bool isMem() const override {
>      return false;
>    }
>  
> +  bool isExpr() const {
> +    return Kind == Expression;
> +  }
> +
> +  bool isSoppBrTarget() const {
> +    return isExpr() || isImm();
> +  }
> +
>    SMLoc getStartLoc() const override {
> -    return SMLoc();
> +    return StartLoc;
>    }
>  
>    SMLoc getEndLoc() const override {
> -    return SMLoc();
> +    return EndLoc;
>    }
>  
>    void print(raw_ostream &OS) const override { }
>  
> -  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
> +  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
> +                                                  enum ImmTy Type = ImmTyNone,
> +                                                  bool IsFPImm = false) {
>      auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
>      Op->Imm.Val = Val;
> +    Op->Imm.IsFPImm = IsFPImm;
> +    Op->Imm.Type = Type;
> +    Op->StartLoc = Loc;
> +    Op->EndLoc = Loc;
>      return Op;
>    }
>  
> -  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
> +  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
> +                                           bool HasExplicitEncodingSize = true) {
>      auto Res = llvm::make_unique<AMDGPUOperand>(Token);
>      Res->Tok.Data = Str.data();
>      Res->Tok.Length = Str.size();
> +    Res->StartLoc = Loc;
> +    Res->EndLoc = Loc;
>      return Res;
>    }
>  
> +  static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
> +                                                  SMLoc E,
> +                                                  const MCRegisterInfo *TRI) {
> +    auto Op = llvm::make_unique<AMDGPUOperand>(Register);
> +    Op->Reg.RegNo = RegNo;
> +    Op->Reg.TRI = TRI;
> +    Op->Reg.Modifiers = -1;
> +    Op->StartLoc = S;
> +    Op->EndLoc = E;
> +    return Op;
> +  }
> +
> +  static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) {
> +    auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
> +    Op->Expr = Expr;
> +    Op->StartLoc = S;
> +    Op->EndLoc = S;
> +    return Op;
> +  }
> +
> +  bool isDSOffset() const;
> +  bool isDSOffset01() const;
>    bool isSWaitCnt() const;
> +  bool isMubufOffset() const;
> +};
> +
> +class AMDGPUAsmParser : public MCTargetAsmParser {
> +  MCSubtargetInfo &STI;
> +  const MCInstrInfo &MII;
> +  MCAsmParser &Parser;
> +
> +  unsigned ForcedEncodingSize;
> +  /// @name Auto-generated Match Functions
> +  /// {
> +
> +#define GET_ASSEMBLER_HEADER
> +#include "AMDGPUGenAsmMatcher.inc"
> +
> +  /// }
> +
> +public:
> +  AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
> +               const MCInstrInfo &_MII,
> +               const MCTargetOptions &Options)
> +      : MCTargetAsmParser(), STI(_STI), MII(_MII), Parser(_Parser),
> +        ForcedEncodingSize(0){
> +
> +    if (!STI.getFeatureBits()) {
> +      // Set default features.
> +      STI.ToggleFeature("SOUTHERN_ISLANDS");
> +    }
> +
> +    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
> +  }
> +
> +  unsigned getForcedEncodingSize() const {
> +    return ForcedEncodingSize;
> +  }
> +
> +  void setForcedEncodingSize(unsigned Size) {
> +    ForcedEncodingSize = Size;
> +  }
> +
> +  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
> +  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
> +  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
> +                               OperandVector &Operands, MCStreamer &Out,
> +                               uint64_t &ErrorInfo,
> +                               bool MatchingInlineAsm) override;
> +  bool ParseDirective(AsmToken DirectiveID) override;
> +  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
> +  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
> +                        SMLoc NameLoc, OperandVector &Operands) override;
> +
> +  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
> +                                          int64_t Default = 0);
> +  OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
> +                                          OperandVector &Operands,
> +                                          enum AMDGPUOperand::ImmTy ImmTy =
> +                                                      AMDGPUOperand::ImmTyNone);
> +  OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
> +                                     enum AMDGPUOperand::ImmTy ImmTy =
> +                                                      AMDGPUOperand::ImmTyNone);
> +  OperandMatchResultTy parseOptionalOps(
> +                                   const ArrayRef<OptionalOperand> &OptionalOps,
> +                                   OperandVector &Operands);
> +
> +
> +  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
> +  void cvtDS(MCInst &Inst, const OperandVector &Operands);
> +  OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
> +  OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
> +  OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
> +
> +  bool parseCnt(int64_t &IntVal);
> +  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
> +  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
> +
> +  void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
> +  OperandMatchResultTy parseOffset(OperandVector &Operands);
> +  OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
> +  OperandMatchResultTy parseGLC(OperandVector &Operands);
> +  OperandMatchResultTy parseSLC(OperandVector &Operands);
> +  OperandMatchResultTy parseTFE(OperandVector &Operands);
> +
> +  OperandMatchResultTy parseDMask(OperandVector &Operands);
> +  OperandMatchResultTy parseUNorm(OperandVector &Operands);
> +  OperandMatchResultTy parseR128(OperandVector &Operands);
> +
> +  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
> +  OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
> +};
> +
> +struct OptionalOperand {
> +  const char *Name;
> +  AMDGPUOperand::ImmTy Type;
> +  bool IsBit;
> +  int64_t Default;
> +  bool (*ConvertResult)(int64_t&);
>  };
>  
>  }
>  
> +static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
> +  if (IsVgpr) {
> +    switch (RegWidth) {
> +      default: llvm_unreachable("Unknown register width");
> +      case 1: return AMDGPU::VGPR_32RegClassID;
> +      case 2: return AMDGPU::VReg_64RegClassID;
> +      case 3: return AMDGPU::VReg_96RegClassID;
> +      case 4: return AMDGPU::VReg_128RegClassID;
> +      case 8: return AMDGPU::VReg_256RegClassID;
> +      case 16: return AMDGPU::VReg_512RegClassID;
> +    }
> +  }
> +
> +  switch (RegWidth) {
> +    default: llvm_unreachable("Unknown register width");
> +    case 1: return AMDGPU::SGPR_32RegClassID;
> +    case 2: return AMDGPU::SGPR_64RegClassID;
> +    case 4: return AMDGPU::SReg_128RegClassID;
> +    case 8: return AMDGPU::SReg_256RegClassID;
> +    case 16: return AMDGPU::SReg_512RegClassID;
> +  }
> +}
> +
> +static unsigned getRegForName(const StringRef &RegName) {
> +
> +  return StringSwitch<unsigned>(RegName)
> +    .Case("exec", AMDGPU::EXEC)
> +    .Case("vcc", AMDGPU::VCC)
> +    .Case("flat_scr", AMDGPU::FLAT_SCR)
> +    .Case("m0", AMDGPU::M0)
> +    .Case("scc", AMDGPU::SCC)
> +    .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
> +    .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
> +    .Case("vcc_lo", AMDGPU::VCC_LO)
> +    .Case("vcc_hi", AMDGPU::VCC_HI)
> +    .Case("exec_lo", AMDGPU::EXEC_LO)
> +    .Case("exec_hi", AMDGPU::EXEC_HI)
> +    .Default(0);
> +}
> +
>  bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
> -  return true;
> +  const AsmToken Tok = Parser.getTok();
> +  StartLoc = Tok.getLoc();
> +  EndLoc = Tok.getEndLoc();
> +  const StringRef &RegName = Tok.getString();
> +  RegNo = getRegForName(RegName);
> +
> +  if (RegNo) {
> +    Parser.Lex();
> +    return false;
> +  }
> +
> +  // Match vgprs and sgprs
> +  if (RegName[0] != 's' && RegName[0] != 'v')
> +    return true;
> +
> +  bool IsVgpr = RegName[0] == 'v';
> +  unsigned RegWidth;
> +  unsigned RegIndexInClass;
> +  if (RegName.size() > 1) {
> +    // We have a 32-bit register
> +    RegWidth = 1;
> +    if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
> +      return true;
> +    Parser.Lex();
> +  } else {
> +    // We have a register greater than 32-bits.
> +
> +    int64_t RegLo, RegHi;
> +    Parser.Lex();
> +    if (getLexer().isNot(AsmToken::LBrac))
> +      return true;
> +
> +    Parser.Lex();
> +    if (getParser().parseAbsoluteExpression(RegLo))
> +      return true;
> +
> +    if (getLexer().isNot(AsmToken::Colon))
> +      return true;
> +
> +    Parser.Lex();
> +    if (getParser().parseAbsoluteExpression(RegHi))
> +      return true;
> +
> +    if (getLexer().isNot(AsmToken::RBrac))
> +      return true;
> +
> +    Parser.Lex();
> +    RegWidth = (RegHi - RegLo) + 1;
> +    if (IsVgpr) {
> +      // VGPR registers aren't aligned.
> +      RegIndexInClass = RegLo;
> +    } else {
> +      // SGPR registers are aligned.  Max alignment is 4 dwords.
> +      RegIndexInClass = RegLo / std::min(RegWidth, 4u);
> +    }
> +  }
> +
> +  const MCRegisterInfo *TRC = getContext().getRegisterInfo();
> +  unsigned RC = getRegClass(IsVgpr, RegWidth);
> +  if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
> +    return true;
> +  RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
> +  return false;
> +}
> +
> +unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
> +
> +  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
> +
> +  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
> +      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
> +    return Match_InvalidOperand;
> +
> +  return Match_Success;
>  }
>  
>  
> @@ -163,22 +510,30 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
>    MCInst Inst;
>  
>    switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
> -  case Match_Success:
> -    Inst.setLoc(IDLoc);
> -    Out.EmitInstruction(Inst, STI);
> -    return false;
> -  case Match_MissingFeature:
> -    return Error(IDLoc, "instruction use requires an option to be enabled");
> -  case Match_MnemonicFail:
> -    return Error(IDLoc, "unrecognized instruction mnemonic");
> -  case Match_InvalidOperand: {
> -    if (ErrorInfo != ~0ULL) {
> -      if (ErrorInfo >= Operands.size())
> -        return Error(IDLoc, "too few operands for instruction");
> +    default: break;
> +    case Match_Success:
> +      Inst.setLoc(IDLoc);
> +      Out.EmitInstruction(Inst, STI);
> +      return false;
> +    case Match_MissingFeature:
> +      return Error(IDLoc, "missing feature");
> +
> +    case Match_MnemonicFail:
> +      return Error(IDLoc, "unrecognized instruction mnemonic");
>  
> +    case Match_InvalidOperand: {
> +      SMLoc ErrorLoc = IDLoc;
> +      if (ErrorInfo != ~0ULL) {
> +        if (ErrorInfo >= Operands.size()) {
> +          return Error(IDLoc, "too few operands for instruction");
> +        }
> +
> +        ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
> +        if (ErrorLoc == SMLoc())
> +          ErrorLoc = IDLoc;
> +      }
> +      return Error(ErrorLoc, "invalid operand for instruction");
>      }
> -    return Error(IDLoc, "invalid operand for instruction");
> -  }
>    }
>    llvm_unreachable("Implement any new match types added!");
>  }
> @@ -187,6 +542,19 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
>    return true;
>  }
>  
> +static bool operandsHaveModifiers(const OperandVector &Operands) {
> +
> +  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
> +    const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
> +    if (Op.isRegWithInputMods())
> +      return true;
> +    if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
> +                       Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
> +      return true;
> +  }
> +  return false;
> +}
> +
>  AMDGPUAsmParser::OperandMatchResultTy
>  AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>  
> @@ -195,17 +563,104 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>  
>    // If we successfully parsed the operand or if there as an error parsing,
>    // we are done.
> -  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
> +  //
> +  // If we are parsing after we reach EndOfStatement then this means we
> +  // are appending default values to the Operands list.  This is only done
> +  // by custom parser, so we shouldn't continue on to the generic parsing.
> +  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
> +      getLexer().is(AsmToken::EndOfStatement))
>      return ResTy;
>  
> +  bool Negate = false, Abs = false;
> +  if (getLexer().getKind()== AsmToken::Minus) {
> +    Parser.Lex();
> +    Negate = true;
> +  }
> +
> +  if (getLexer().getKind() == AsmToken::Pipe) {
> +    Parser.Lex();
> +    Abs = true;
> +  }
> +
>    switch(getLexer().getKind()) {
>      case AsmToken::Integer: {
> +      SMLoc S = Parser.getTok().getLoc();
>        int64_t IntVal;
>        if (getParser().parseAbsoluteExpression(IntVal))
>          return MatchOperand_ParseFail;
> -      Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
> +      APInt IntVal32(32, IntVal);
> +      if (IntVal32.getSExtValue() != IntVal) {
> +        Error(S, "invalid immediate: only 32-bit values are legal");
> +        return MatchOperand_ParseFail;
> +      }
> +
> +      IntVal = IntVal32.getSExtValue();
> +      if (Negate)
> +        IntVal *= -1;
> +      Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
>        return MatchOperand_Success;
>      }
> +    case AsmToken::Real: {
> +      // FIXME: We should emit an error if a double precisions floating-point
> +      // value is used.  I'm not sure the best way to detect this.
> +      SMLoc S = Parser.getTok().getLoc();
> +      int64_t IntVal;
> +      if (getParser().parseAbsoluteExpression(IntVal))
> +        return MatchOperand_ParseFail;
> +
> +      APFloat F((float)BitsToDouble(IntVal));
> +      if (Negate)
> +        F.changeSign();
> +      Operands.push_back(
> +          AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
> +      return MatchOperand_Success;
> +    }
> +    case AsmToken::Identifier: {
> +      SMLoc S, E;
> +      unsigned RegNo;
> +      if (!ParseRegister(RegNo, S, E)) {
> +
> +        bool HasModifiers = operandsHaveModifiers(Operands);
> +        unsigned Modifiers = 0;
> +
> +        if (Negate)
> +          Modifiers |= 0x1;
> +
> +        if (Abs) {
> +          if (getLexer().getKind() != AsmToken::Pipe)
> +            return MatchOperand_ParseFail;
> +          Parser.Lex();
> +          Modifiers |= 0x2;
> +        }
> +
> +        if (Modifiers && !HasModifiers) {
> +          // We are adding a modifier to src1 or src2 and previous sources
> +          // don't have modifiers, so we need to go back and empty modifers
> +          // for each previous source.
> +          for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
> +               --PrevRegIdx) {
> +
> +            AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
> +            RegOp.setModifiers(0);
> +          }
> +        }
> +
> +
> +        Operands.push_back(AMDGPUOperand::CreateReg(
> +            RegNo, S, E, getContext().getRegisterInfo()));
> +
> +        if (HasModifiers || Modifiers) {
> +          AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
> +          RegOp.setModifiers(Modifiers);
> +
> +        }
> +     }  else {
> +      Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
> +                                                    S));
> +      Parser.Lex();
> +     }
> +     return MatchOperand_Success;
> +    }
>      default:
>        return MatchOperand_NoMatch;
>    }
> @@ -214,23 +669,283 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>  bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
>                                         StringRef Name,
>                                         SMLoc NameLoc, OperandVector &Operands) {
> +
> +  // Clear any forced encodings from the previous instruction.
> +  setForcedEncodingSize(0);
> +
> +  if (Name.endswith("_e64"))
> +    setForcedEncodingSize(64);
> +  else if (Name.endswith("_e32"))
> +    setForcedEncodingSize(32);
> +
>    // Add the instruction mnemonic
>    Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
>  
> -  if (getLexer().is(AsmToken::EndOfStatement))
> -    return false;
> +  while (!getLexer().is(AsmToken::EndOfStatement)) {
> +    AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
>  
> -  AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
> -  switch (Res) {
> -    case MatchOperand_Success: return false;
> -    case MatchOperand_ParseFail: return Error(NameLoc,
> -                                              "Failed parsing operand");
> -    case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
> +    // Eat the comma or space if there is one.
> +    if (getLexer().is(AsmToken::Comma))
> +      Parser.Lex();
> +
> +    switch (Res) {
> +      case MatchOperand_Success: break;
> +      case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
> +                                                "failed parsing operand.");
> +      case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
> +                                              "not a valid operand.");
> +    }
>    }
> -  return true;
> +
> +  // Once we reach end of statement, continue parsing so we can add default
> +  // values for optional arguments.
> +  AMDGPUAsmParser::OperandMatchResultTy Res;
> +  while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) {
> +    if (Res != MatchOperand_Success)
> +      return Error(getLexer().getLoc(), "failed parsing operand.");
> +  }
> +  return false;
>  }
>  
>  //===----------------------------------------------------------------------===//
> +// Utility functions
> +//===----------------------------------------------------------------------===//
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
> +                                    int64_t Default) {
> +
> +  // We are at the end of the statement, and this is a default argument, so
> +  // use a default value.
> +  if (getLexer().is(AsmToken::EndOfStatement)) {
> +    Int = Default;
> +    return MatchOperand_Success;
> +  }
> +
> +  switch(getLexer().getKind()) {
> +    default: return MatchOperand_NoMatch;
> +    case AsmToken::Identifier: {
> +      StringRef OffsetName = Parser.getTok().getString();
> +      if (!OffsetName.equals(Prefix))
> +        return MatchOperand_NoMatch;
> +
> +      Parser.Lex();
> +      if (getLexer().isNot(AsmToken::Colon))
> +        return MatchOperand_ParseFail;
> +
> +      Parser.Lex();
> +      if (getLexer().isNot(AsmToken::Integer))
> +        return MatchOperand_ParseFail;
> +
> +      if (getParser().parseAbsoluteExpression(Int))
> +        return MatchOperand_ParseFail;
> +      break;
> +    }
> +  }
> +  return MatchOperand_Success;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
> +                                    enum AMDGPUOperand::ImmTy ImmTy) {
> +
> +  SMLoc S = Parser.getTok().getLoc();
> +  int64_t Offset = 0;
> +
> +  AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
> +  if (Res != MatchOperand_Success)
> +    return Res;
> +
> +  Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
> +  return MatchOperand_Success;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
> +                               enum AMDGPUOperand::ImmTy ImmTy) {
> +  int64_t Bit = 0;
> +  SMLoc S = Parser.getTok().getLoc();
> +
> +  // We are at the end of the statement, and this is a default argument, so
> +  // use a default value.
> +  if (getLexer().isNot(AsmToken::EndOfStatement)) {
> +    switch(getLexer().getKind()) {
> +      case AsmToken::Identifier: {
> +        StringRef Tok = Parser.getTok().getString();
> +        if (Tok == Name) {
> +          Bit = 1;
> +          Parser.Lex();
> +        } else if (Tok.startswith("no") && Tok.endswith(Name)) {
> +          Bit = 0;
> +          Parser.Lex();
> +        } else {
> +          return MatchOperand_NoMatch;
> +        }
> +        break;
> +      }
> +      default:
> +        return MatchOperand_NoMatch;
> +    }
> +  }
> +
> +  Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
> +  return MatchOperand_Success;
> +}
> +
> +static bool operandsHasOptionalOp(const OperandVector &Operands,
> +                                  const OptionalOperand &OOp) {
> +  for (unsigned i = 0; i < Operands.size(); i++) {
> +    const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]);
> +    if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) ||
> +        (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name))
> +      return true;
> +
> +  }
> +  return false;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
> +                                   OperandVector &Operands) {
> +  SMLoc S = Parser.getTok().getLoc();
> +  for (const OptionalOperand &Op : OptionalOps) {
> +    if (operandsHasOptionalOp(Operands, Op))
> +      continue;
> +    AMDGPUAsmParser::OperandMatchResultTy Res;
> +    int64_t Value;
> +    if (Op.IsBit) {
> +      Res = parseNamedBit(Op.Name, Operands, Op.Type);
> +      if (Res == MatchOperand_NoMatch)
> +        continue;
> +      return Res;
> +    }
> +
> +    Res = parseIntWithPrefix(Op.Name, Value, Op.Default);
> +
> +    if (Res == MatchOperand_NoMatch)
> +      continue;
> +
> +    if (Res != MatchOperand_Success)
> +      return Res;
> +
> +    if (Op.ConvertResult && !Op.ConvertResult(Value)) {
> +      return MatchOperand_ParseFail;
> +    }
> +
> +    Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type));
> +    return MatchOperand_Success;
> +  }
> +  return MatchOperand_NoMatch;
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// ds
> +//===----------------------------------------------------------------------===//
> +
> +static const OptionalOperand DSOptionalOps [] = {
> +  {"offset",  AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
> +  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
> +};
> +
> +static const OptionalOperand DSOptionalOpsOff01 [] = {
> +  {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
> +  {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
> +  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
> +};
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
> +  return parseOptionalOps(DSOptionalOps, Operands);
> +}
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
> +  return parseOptionalOps(DSOptionalOpsOff01, Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
> +  SMLoc S = Parser.getTok().getLoc();
> +  AMDGPUAsmParser::OperandMatchResultTy Res =
> +    parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
> +  if (Res == MatchOperand_NoMatch) {
> +    Operands.push_back(AMDGPUOperand::CreateImm(0, S,
> +                       AMDGPUOperand::ImmTyOffset));
> +    Res = MatchOperand_Success;
> +  }
> +  return Res;
> +}
> +
> +bool AMDGPUOperand::isDSOffset() const {
> +  return isImm() && isUInt<16>(getImm());
> +}
> +
> +bool AMDGPUOperand::isDSOffset01() const {
> +  return isImm() && isUInt<8>(getImm());
> +}
> +
> +void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
> +                                    const OperandVector &Operands) {
> +
> +  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> +
> +  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
> +    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> +    // Add the register arguments
> +    if (Op.isReg()) {
> +      Op.addRegOperands(Inst, 1);
> +      continue;
> +    }
> +
> +    // Handle optional arguments
> +    OptionalIdx[Op.getImmTy()] = i;
> +  }
> +
> +  unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0];
> +  unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1];
> +  unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
> +
> +  ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0
> +  ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1
> +  ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
> +  Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
> +}
> +
> +void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
> +
> +  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> +  bool GDSOnly = false;
> +
> +  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
> +    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> +    // Add the register arguments
> +    if (Op.isReg()) {
> +      Op.addRegOperands(Inst, 1);
> +      continue;
> +    }
> +
> +    if (Op.isToken() && Op.getToken() == "gds") {
> +      GDSOnly = true;
> +      continue;
> +    }
> +
> +    // Handle optional arguments
> +    OptionalIdx[Op.getImmTy()] = i;
> +  }
> +
> +  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
> +  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset
> +
> +  if (!GDSOnly) {
> +    unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
> +    ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
> +  }
> +  Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
> +}
> +
> +
> +//===----------------------------------------------------------------------===//
>  // s_waitcnt
>  //===----------------------------------------------------------------------===//
>  
> @@ -284,6 +999,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
>    // expcnt  [6:4]
>    // lgkmcnt [10:8]
>    int64_t CntVal = 0x77f;
> +  SMLoc S = Parser.getTok().getLoc();
>  
>    switch(getLexer().getKind()) {
>      default: return MatchOperand_ParseFail;
> @@ -300,7 +1016,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
>        } while(getLexer().isNot(AsmToken::EndOfStatement));
>        break;
>    }
> -  Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
> +  Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
>    return MatchOperand_Success;
>  }
>  
> @@ -308,6 +1024,245 @@ bool AMDGPUOperand::isSWaitCnt() const {
>    return isImm();
>  }
>  
> +//===----------------------------------------------------------------------===//
> +// sopp branch targets
> +//===----------------------------------------------------------------------===//
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
> +  SMLoc S = Parser.getTok().getLoc();
> +
> +  switch (getLexer().getKind()) {
> +    default: return MatchOperand_ParseFail;
> +    case AsmToken::Integer: {
> +      int64_t Imm;
> +      if (getParser().parseAbsoluteExpression(Imm))
> +        return MatchOperand_ParseFail;
> +      Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
> +      return MatchOperand_Success;
> +    }
> +
> +    case AsmToken::Identifier:
> +      Operands.push_back(AMDGPUOperand::CreateExpr(
> +          MCSymbolRefExpr::Create(getContext().GetOrCreateSymbol(
> +                                  Parser.getTok().getString()), getContext()), S));
> +      Parser.Lex();
> +      return MatchOperand_Success;
> +  }
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// mubuf
> +//===----------------------------------------------------------------------===//
> +
> +static const OptionalOperand MubufOptionalOps [] = {
> +  {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
> +  {"glc",    AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
> +  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
> +  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
> +};
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
> +  return parseOptionalOps(MubufOptionalOps, Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
> +  return parseIntWithPrefix("offset", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
> +  return parseNamedBit("glc", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
> +  return parseNamedBit("slc", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
> +  return parseNamedBit("tfe", Operands);
> +}
> +
> +bool AMDGPUOperand::isMubufOffset() const {
> +  return isImm() && isUInt<12>(getImm());
> +}
> +
> +void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
> +                               const OperandVector &Operands) {
> +  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> +
> +  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
> +    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> +    // Add the register arguments
> +    if (Op.isReg()) {
> +      Op.addRegOperands(Inst, 1);
> +      continue;
> +    }
> +
> +    // Handle the case where soffset is an immediate
> +    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
> +      Op.addImmOperands(Inst, 1);
> +      continue;
> +    }
> +
> +    // Handle tokens like 'offen' which are sometimes hard-coded into the
> +    // asm string.  There are no MCInst operands for these.
> +    if (Op.isToken()) {
> +      continue;
> +    }
> +    assert(Op.isImm());
> +
> +    // Handle optional arguments
> +    OptionalIdx[Op.getImmTy()] = i;
> +  }
> +
> +  assert(OptionalIdx.size() == 4);
> +
> +  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
> +  unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
> +  unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
> +  unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
> +
> +  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
> +  ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
> +  ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
> +  ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// mimg
> +//===----------------------------------------------------------------------===//
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDMask(OperandVector &Operands) {
> +  return parseIntWithPrefix("dmask", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseUNorm(OperandVector &Operands) {
> +  return parseNamedBit("unorm", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseR128(OperandVector &Operands) {
> +  return parseNamedBit("r128", Operands);
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// vop3
> +//===----------------------------------------------------------------------===//
> +
> +static bool ConvertOmodMul(int64_t &Mul) {
> +  if (Mul != 1 && Mul != 2 && Mul != 4)
> +    return false;
> +
> +  Mul >>= 1;
> +  return true;
> +}
> +
> +static bool ConvertOmodDiv(int64_t &Div) {
> +  if (Div == 1) {
> +    Div = 0;
> +    return true;
> +  }
> +
> +  if (Div == 2) {
> +    Div = 3;
> +    return true;
> +  }
> +
> +  return false;
> +}
> +
> +static const OptionalOperand VOP3OptionalOps [] = {
> +  {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
> +  {"mul",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
> +  {"div",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
> +};
> +
> +static bool isVOP3(OperandVector &Operands) {
> +  if (operandsHaveModifiers(Operands))
> +    return true;
> +
> +  AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
> +
> +  if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
> +    return true;
> +
> +  if (Operands.size() >= 5)
> +    return true;
> +
> +  if (Operands.size() > 3) {
> +    AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
> +    if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
> +                            Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)))
> +      return true;
> +  }
> +  return false;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
> +
> +  // The value returned by this function may change after parsing
> +  // an operand so store the original value here.
> +  bool HasModifiers = operandsHaveModifiers(Operands);
> +
> +  bool IsVOP3 = isVOP3(Operands);
> +  if (HasModifiers || IsVOP3 ||
> +      getLexer().isNot(AsmToken::EndOfStatement) ||
> +      getForcedEncodingSize() == 64) {
> +
> +    AMDGPUAsmParser::OperandMatchResultTy Res =
> +        parseOptionalOps(VOP3OptionalOps, Operands);
> +
> +    if (!HasModifiers && Res == MatchOperand_Success) {
> +      // We have added a modifier operation, so we need to make sure all
> +      // previous register operands have modifiers
> +      for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
> +        AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
> +        if (Op.isReg())
> +          Op.setModifiers(0);
> +      }
> +    }
> +    return Res;
> +  }
> +  return MatchOperand_NoMatch;
> +}
> +
> +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
> +  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
> +  unsigned i = 2;
> +
> +  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> +
> +  if (operandsHaveModifiers(Operands)) {
> +    for (unsigned e = Operands.size(); i != e; ++i) {
> +      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> +      if (Op.isRegWithInputMods()) {
> +        ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2);
> +        continue;
> +      }
> +      OptionalIdx[Op.getImmTy()] = i;
> +    }
> +
> +    unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp];
> +    unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod];
> +
> +    ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1);
> +    ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1);
> +  } else {
> +    for (unsigned e = Operands.size(); i != e; ++i)
> +      ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1);
> +  }
> +}
> +
>  /// Force static initialization.
>  extern "C" void LLVMInitializeR600AsmParser() {
>    RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> index c7f9da6..bdad818 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -127,7 +127,8 @@ void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
>      O << " tfe";
>  }
>  
> -void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
> +void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
> +                                        const MCRegisterInfo &MRI) {
>    switch (reg) {
>    case AMDGPU::VCC:
>      O << "vcc";
> @@ -297,7 +298,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
>        break;
>  
>      default:
> -      printRegOperand(Op.getReg(), O);
> +      printRegOperand(Op.getReg(), O, MRI);
>        break;
>      }
>    } else if (Op.isImm()) {
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> index 5289718..b82e388 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> @@ -30,6 +30,8 @@ public:
>    static const char *getRegisterName(unsigned RegNo);
>  
>    void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
> +  static void printRegOperand(unsigned RegNo, raw_ostream &O,
> +                              const MCRegisterInfo &MRI);
>  
>  private:
>    void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index af38c94..e2197fe 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -2082,3 +2082,38 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
>    return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
>                              cast<RegisterSDNode>(VReg)->getReg(), VT);
>  }
> +
> +//===----------------------------------------------------------------------===//
> +//                         SI Inline Assembly Support
> +//===----------------------------------------------------------------------===//
> +
> +std::pair<unsigned, const TargetRegisterClass *>
> +SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
> +                                               const std::string &Constraint,
> +                                               MVT VT) const {
> +  if (Constraint == "r") {
> +    switch(VT.SimpleTy) {
> +      default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
> +      case MVT::i64:
> +        return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
> +      case MVT::i32:
> +        return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
> +    }
> +  }
> +
> +  if (Constraint.size() > 1) {
> +    const TargetRegisterClass *RC = nullptr;
> +    if (Constraint[1] == 'v') {
> +      RC = &AMDGPU::VGPR_32RegClass;
> +    } else if (Constraint[1] == 's') {
> +      RC = &AMDGPU::SGPR_32RegClass;
> +    }
> +
> +    if (RC) {
> +      unsigned Idx = std::atoi(Constraint.substr(2).c_str());
> +      if (Idx < RC->getNumRegs())
> +        return std::make_pair(RC->getRegister(Idx), RC);
> +    }
> +  }
> +  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
> +}
> diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
> index 92f5847..168de4c 100644
> --- a/lib/Target/R600/SIISelLowering.h
> +++ b/lib/Target/R600/SIISelLowering.h
> @@ -113,6 +113,10 @@ public:
>    MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
>                                    SDLoc DL,
>                                    SDValue Ptr) const;
> +
> +  std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
> +                                   const TargetRegisterInfo *TRI,
> +                                   const std::string &Constraint, MVT VT) const;
>  };
>  
>  } // End namespace llvm
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index e7a07a1..bc693c3 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -130,6 +130,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
>    let AddedComplexity = -1000;
>  
>    let VOP3 = 1;
> +  let VALU = 1;
> +
> +  let AsmMatchConverter = "cvtVOP3";
> +  let isCodeGenOnly = 0;
> +
>    int Size = 8;
>  }
>  
> @@ -221,6 +226,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
>    let mayLoad = 0;
>    let mayStore = 0;
>    let hasSideEffects = 0;
> +  let isCodeGenOnly = 0;
>    let SALU = 1;
>    let SOP1 = 1;
>  }
> @@ -231,6 +237,7 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
>    let mayLoad = 0;
>    let mayStore = 0;
>    let hasSideEffects = 0;
> +  let isCodeGenOnly = 0;
>    let SALU = 1;
>    let SOP2 = 1;
>  
> @@ -246,6 +253,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
>    let hasSideEffects = 0;
>    let SALU = 1;
>    let SOPC = 1;
> +  let isCodeGenOnly = 0;
>  
>    let UseNamedOperandTable = 1;
>  }
> @@ -563,10 +571,14 @@ let Uses = [EXEC] in {
>  
>  class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
>      VOP1Common <outs, ins, asm, pattern>,
> -    VOP1e<op>;
> +    VOP1e<op> {
> +  let isCodeGenOnly = 0;
> +}
>  
>  class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
> -    VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
> +    VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
> +  let isCodeGenOnly = 0;
> +}
>  
>  class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
>      VOPCCommon <ins, asm, pattern>, VOPCe <op>;
> @@ -599,6 +611,7 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
>    let mayStore = 1;
>  
>    let hasSideEffects = 0;
> +  let AsmMatchConverter = "cvtDS";
>    let SchedRW = [WriteLDS];
>  }
>  
> @@ -611,6 +624,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
>  
>    let hasSideEffects = 0;
>    let UseNamedOperandTable = 1;
> +  let AsmMatchConverter = "cvtMubuf";
>    let SchedRW = [WriteVMEM];
>  }
>  
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 345e699..9091b19 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -6,6 +6,15 @@
>  // License. See LICENSE.TXT for details.
>  //
>  //===----------------------------------------------------------------------===//
> +def isSICI : Predicate<
> +  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
> +  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
> +>, AssemblerPredicate<"FeatureGCN1Encoding">;
> +def isCI : Predicate<"Subtarget->getGeneration() "
> +                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
> +def isVI : Predicate <
> +  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
> +  AssemblerPredicate<"FeatureGCN3Encoding">;
>  
>  class vop {
>    field bits<9> SI3;
> @@ -233,14 +242,88 @@ def FRAMEri32 : Operand<iPTR> {
>    let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
>  }
>  
> +def SoppBrTarget : AsmOperandClass {
> +  let Name = "SoppBrTarget";
> +  let ParserMethod = "parseSOppBrTarget";
> +}
> +
>  def sopp_brtarget : Operand<OtherVT> {
>    let EncoderMethod = "getSOPPBrEncoding";
>    let OperandType = "OPERAND_PCREL";
> +  let ParserMatchClass = SoppBrTarget;
>  }
>  
>  include "SIInstrFormats.td"
>  include "VIInstrFormats.td"
>  
> +def MubufOffsetMatchClass : AsmOperandClass {
> +  let Name = "MubufOffset";
> +  let ParserMethod = "parseMubufOptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +class DSOffsetBaseMatchClass <string parser> : AsmOperandClass {
> +  let Name = "DSOffset"#parser;
> +  let ParserMethod = parser;
> +  let RenderMethod = "addImmOperands";
> +  let PredicateMethod = "isDSOffset";
> +}
> +
> +def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">;
> +def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">;
> +
> +def DSOffset01MatchClass : AsmOperandClass {
> +  let Name = "DSOffset1";
> +  let ParserMethod = "parseDSOff01OptionalOps";
> +  let RenderMethod = "addImmOperands";
> +  let PredicateMethod = "isDSOffset01";
> +}
> +
> +class GDSBaseMatchClass <string parser> : AsmOperandClass {
> +  let Name = "GDS"#parser;
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = parser;
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">;
> +def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
> +
> +def GLCMatchClass : AsmOperandClass {
> +  let Name = "GLC";
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = "parseMubufOptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +def SLCMatchClass : AsmOperandClass {
> +  let Name = "SLC";
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = "parseMubufOptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +def TFEMatchClass : AsmOperandClass {
> +  let Name = "TFE";
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = "parseMubufOptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +def OModMatchClass : AsmOperandClass {
> +  let Name = "OMod";
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = "parseVOP3OptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
> +def ClampMatchClass : AsmOperandClass {
> +  let Name = "Clamp";
> +  let PredicateMethod = "isImm";
> +  let ParserMethod = "parseVOP3OptionalOps";
> +  let RenderMethod = "addImmOperands";
> +}
> +
>  let OperandType = "OPERAND_IMMEDIATE" in {
>  
>  def offen : Operand<i1> {
> @@ -254,35 +337,52 @@ def addr64 : Operand<i1> {
>  }
>  def mbuf_offset : Operand<i16> {
>    let PrintMethod = "printMBUFOffset";
> +  let ParserMatchClass = MubufOffsetMatchClass;
>  }
> -def ds_offset : Operand<i16> {
> +class ds_offset_base <AsmOperandClass mc> : Operand<i16> {
>    let PrintMethod = "printDSOffset";
> +  let ParserMatchClass = mc;
>  }
> +def ds_offset : ds_offset_base <DSOffsetMatchClass>;
> +def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>;
> +
>  def ds_offset0 : Operand<i8> {
>    let PrintMethod = "printDSOffset0";
> +  let ParserMatchClass = DSOffset01MatchClass;
>  }
>  def ds_offset1 : Operand<i8> {
>    let PrintMethod = "printDSOffset1";
> +  let ParserMatchClass = DSOffset01MatchClass;
>  }
> -def gds : Operand <i1> {
> +class gds_base <AsmOperandClass mc> : Operand <i1> {
>    let PrintMethod = "printGDS";
> +  let ParserMatchClass = mc;
>  }
> +def gds : gds_base <GDSMatchClass>;
> +
> +def gds01 : gds_base <GDS01MatchClass>;
> +
>  def glc : Operand <i1> {
>    let PrintMethod = "printGLC";
> +  let ParserMatchClass = GLCMatchClass;
>  }
>  def slc : Operand <i1> {
>    let PrintMethod = "printSLC";
> +  let ParserMatchClass = SLCMatchClass;
>  }
>  def tfe : Operand <i1> {
>    let PrintMethod = "printTFE";
> +  let ParserMatchClass = TFEMatchClass;
>  }
>  
>  def omod : Operand <i32> {
>    let PrintMethod = "printOModSI";
> +  let ParserMatchClass = OModMatchClass;
>  }
>  
>  def ClampMod : Operand <i1> {
>    let PrintMethod = "printClampSI";
> +  let ParserMatchClass = ClampMatchClass;
>  }
>  
>  } // End OperandType = "OPERAND_IMMEDIATE"
> @@ -391,12 +491,18 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
>  class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
>    SOP1 <outs, ins, asm, []>,
>    SOP1e <op.SI>,
> -  SIMCInstr<opName, SISubtarget.SI>;
> +  SIMCInstr<opName, SISubtarget.SI> {
> +  let isCodeGenOnly = 0;
> +  let AssemblerPredicates = [isSICI];
> +}
>  
>  class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
>    SOP1 <outs, ins, asm, []>,
>    SOP1e <op.VI>,
> -  SIMCInstr<opName, SISubtarget.VI>;
> +  SIMCInstr<opName, SISubtarget.VI> {
> +  let isCodeGenOnly = 0;
> +  let AssemblerPredicates = [isVI];
> +}
>  
>  multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm,
>                     list<dag> pattern> {
> @@ -472,12 +578,16 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
>  class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
>    SOP2<outs, ins, asm, []>,
>    SOP2e<op.SI>,
> -  SIMCInstr<opName, SISubtarget.SI>;
> +  SIMCInstr<opName, SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +}
>  
>  class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
>    SOP2<outs, ins, asm, []>,
>    SOP2e<op.VI>,
> -  SIMCInstr<opName, SISubtarget.VI>;
> +  SIMCInstr<opName, SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +}
>  
>  multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
>    def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
> @@ -539,12 +649,18 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
>  class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
>    SOPK <outs, ins, asm, []>,
>    SOPKe <op.SI>,
> -  SIMCInstr<opName, SISubtarget.SI>;
> +  SIMCInstr<opName, SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +  let isCodeGenOnly = 0;
> +}
>  
>  class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
>    SOPK <outs, ins, asm, []>,
>    SOPKe <op.VI>,
> -  SIMCInstr<opName, SISubtarget.VI>;
> +  SIMCInstr<opName, SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +  let isCodeGenOnly = 0;
> +}
>  
>  multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
>                     string asm = opName#opAsm> {
> @@ -619,13 +735,17 @@ class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
>                      string asm> :
>    SMRD <outs, ins, asm, []>,
>    SMRDe <op, imm>,
> -  SIMCInstr<opName, SISubtarget.SI>;
> +  SIMCInstr<opName, SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +}
>  
>  class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
>                      string asm> :
>    SMRD <outs, ins, asm, []>,
>    SMEMe_vi <op, imm>,
> -  SIMCInstr<opName, SISubtarget.VI>;
> +  SIMCInstr<opName, SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +}
>  
>  multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
>                     string asm, list<dag> pattern> {
> @@ -664,8 +784,14 @@ multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
>  def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
>    let PrintMethod = "printOperandAndMods";
>  }
> +
> +def InputModsMatchClass : AsmOperandClass {
> +  let Name = "RegWithInputMods";
> +}
> +
>  def InputModsNoDefault : Operand <i32> {
>    let PrintMethod = "printOperandAndMods";
> +  let ParserMatchClass = InputModsMatchClass;
>  }
>  
>  class getNumSrcArgs<ValueType Src1, ValueType Src2> {
> @@ -873,7 +999,8 @@ class AtomicNoRet <string noRetOp, bit isRet> {
>  class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>    VOP1Common <outs, ins, "", pattern>,
>    VOP <opName>,
> -  SIMCInstr <opName#"_e32", SISubtarget.NONE> {
> +  SIMCInstr <opName#"_e32", SISubtarget.NONE>,
> +  MnemonicAlias<opName#"_e32", opName> {
>    let isPseudo = 1;
>    let isCodeGenOnly = 1;
>  
> @@ -908,18 +1035,23 @@ multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
>  class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>    VOP2Common <outs, ins, "", pattern>,
>    VOP <opName>,
> -  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
> +  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
> +  MnemonicAlias<opName#"_e32", opName> {
>    let isPseudo = 1;
>    let isCodeGenOnly = 1;
>  }
>  
>  class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
>    VOP2 <op.SI, outs, ins, opName#asm, []>,
> -  SIMCInstr <opName#"_e32", SISubtarget.SI>;
> +  SIMCInstr <opName#"_e32", SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +}
>  
>  class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
>    VOP2 <op.SI, outs, ins, opName#asm, []>,
> -  SIMCInstr <opName#"_e32", SISubtarget.VI>;
> +  SIMCInstr <opName#"_e32", SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +}
>  
>  multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
>                       string opName, string revOp> {
> @@ -965,7 +1097,8 @@ class VOP3DisableModFields <bit HasSrc0Mods,
>  class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>    VOP3Common <outs, ins, "", pattern>,
>    VOP <opName>,
> -  SIMCInstr<opName#"_e64", SISubtarget.NONE> {
> +  SIMCInstr<opName#"_e64", SISubtarget.NONE>,
> +  MnemonicAlias<opName#"_e64", opName> {
>    let isPseudo = 1;
>    let isCodeGenOnly = 1;
>  }
> @@ -973,22 +1106,30 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>  class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
>    VOP3Common <outs, ins, asm, []>,
>    VOP3e <op>,
> -  SIMCInstr<opName#"_e64", SISubtarget.SI>;
> +  SIMCInstr<opName#"_e64", SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +}
>  
>  class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
>    VOP3Common <outs, ins, asm, []>,
>    VOP3e_vi <op>,
> -  SIMCInstr <opName#"_e64", SISubtarget.VI>;
> +  SIMCInstr <opName#"_e64", SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +}
>  
>  class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
>    VOP3Common <outs, ins, asm, []>,
>    VOP3be <op>,
> -  SIMCInstr<opName#"_e64", SISubtarget.SI>;
> +  SIMCInstr<opName#"_e64", SISubtarget.SI> {
> +  let AssemblerPredicates = [isSICI];
> +}
>  
>  class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
>    VOP3Common <outs, ins, asm, []>,
>    VOP3be_vi <op>,
> -  SIMCInstr <opName#"_e64", SISubtarget.VI>;
> +  SIMCInstr <opName#"_e64", SISubtarget.VI> {
> +  let AssemblerPredicates = [isVI];
> +}
>  
>  multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
>                     string opName, int NumSrcArgs, bit HasMods = 1> {
> @@ -1129,12 +1270,16 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
>    }
>  
>    def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
> -            SIMCInstr <opName, SISubtarget.SI>;
> +            SIMCInstr <opName, SISubtarget.SI> {
> +            let AssemblerPredicates = [isSICI];
> +  }
>  
>    def _vi : VOP3Common <outs, ins, asm, []>,
>              VOP3e_vi <op.VI3>,
>              VOP3DisableFields <1, 0, 0>,
> -            SIMCInstr <opName, SISubtarget.VI>;
> +            SIMCInstr <opName, SISubtarget.VI> {
> +            let AssemblerPredicates = [isVI];
> +  }
>  }
>  
>  multiclass VOP1_Helper <vop1 op, string opName, dag outs,
> @@ -1287,7 +1432,8 @@ let isCodeGenOnly = 0 in {
>  class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>    VOPCCommon <ins, "", pattern>,
>    VOP <opName>,
> -  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
> +  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
> +  MnemonicAlias<opName#"_e32", opName> {
>    let isPseudo = 1;
>    let isCodeGenOnly = 1;
>  }
> @@ -1534,7 +1680,9 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
>  class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
>    DS <outs, ins, asm, []>,
>    DSe <op>,
> -  SIMCInstr <opName, SISubtarget.SI>;
> +  SIMCInstr <opName, SISubtarget.SI> {
> +  let isCodeGenOnly = 0;
> +}
>  
>  class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
>    DS <outs, ins, asm, []>,
> @@ -1548,6 +1696,7 @@ class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm
>    bits<16> offset;
>    let offset0 = offset{7-0};
>    let offset1 = offset{15-8};
> +  let isCodeGenOnly = 0;
>  }
>  
>  class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
> @@ -1575,12 +1724,12 @@ multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
>  multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
>    dag outs = (outs rc:$vdst),
>    dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
> -                 gds:$gds, M0Reg:$m0),
> +                 gds01:$gds, M0Reg:$m0),
>    string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
>  
>    def "" : DS_Pseudo <opName, outs, ins, []>;
>  
> -  let data0 = 0, data1 = 0 in {
> +  let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in {
>      def _si : DS_Real_si <op, opName, outs, ins, asm>;
>      def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
>    }
> @@ -1604,12 +1753,12 @@ multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
>  multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
>    dag outs = (outs),
>    dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
> -              ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0),
> +              ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds, M0Reg:$m0),
>    string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
>  
>    def "" : DS_Pseudo <opName, outs, ins, []>;
>  
> -  let vdst = 0 in {
> +  let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
>      def _si : DS_Real_si <op, opName, outs, ins, asm>;
>      def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
>    }
> @@ -1683,7 +1832,7 @@ multiclass DS_0A_RET <bits<8> op, string opName,
>  
>  multiclass DS_1A_RET_GDS <bits<8> op, string opName,
>    dag outs = (outs VGPR_32:$vdst),
> -  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
> +  dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset, M0Reg:$m0),
>    string asm = opName#" $vdst, $addr"#"$offset gds"> {
>  
>    def "" : DS_Pseudo <opName, outs, ins, []>;
> @@ -1792,6 +1941,20 @@ class mubuf <bits<7> si, bits<7> vi = si> {
>    field bits<7> VI = vi;
>  }
>  
> +let isCodeGenOnly = 0 in {
> +
> +class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> +  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
> +  let lds  = 0;
> +}
> +
> +} // End let isCodeGenOnly = 0
> +
> +class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> +  MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
> +  let lds = 0;
> +}
> +
>  class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
>    bit IsAddr64 = is_addr64;
>    string OpName = NAME # suffix;
> @@ -1835,7 +1998,7 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
>    def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
>             MUBUFAddr64Table <0>;
>  
> -  let addr64 = 0 in {
> +  let addr64 = 0, isCodeGenOnly = 0 in {
>      def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
>    }
>  
> @@ -1848,7 +2011,7 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
>    def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
>             MUBUFAddr64Table <1>;
>  
> -  let addr64 = 1 in {
> +  let addr64 = 1, isCodeGenOnly = 0 in {
>      def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
>    }
>  
> @@ -1856,11 +2019,6 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
>    // for VI appropriately.
>  }
>  
> -class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> -  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
> -  let lds = 0;
> -}
> -
>  multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
>                                  string asm, list<dag> pattern, bit is_return> {
>  
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index d6e4986..7c74eb6 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -27,16 +27,8 @@ def SendMsgImm : Operand<i32> {
>  }
>  
>  def isGCN : Predicate<"Subtarget->getGeneration() "
> -                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
> -def isSICI : Predicate<
> -  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
> -  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
> ->;
> -def isCI : Predicate<"Subtarget->getGeneration() "
> -                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
> -def isVI : Predicate <
> -  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS"
> ->;
> +                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
> +            AssemblerPredicate<"FeatureGCN">;
>  
>  def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
>  
> @@ -240,9 +232,9 @@ defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
>  >;
>  } // End Defs = [SCC]
>  
> -defm S_CSELECT_B32 : SOP2_SELECT_32 <sop2<0x0a>, "s_cselect_b32", []>;
>  
>  let Uses = [SCC] in {
> +  defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>;
>    defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
>  } // End Uses = [SCC]
>  
> @@ -1663,7 +1655,6 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
>    VOP_F32_F32_I32, AMDGPUldexp
>  >;
>  
> -
>  defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
>    VOP_I32_F32_I32>; // TODO: set "Uses = dst"
>  
> diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
> index 7bb5dc2..f289014 100644
> --- a/lib/Target/R600/SIRegisterInfo.td
> +++ b/lib/Target/R600/SIRegisterInfo.td
> @@ -66,7 +66,7 @@ foreach Index = 0-255 in {
>  //===----------------------------------------------------------------------===//
>  
>  // SGPR 32-bit registers
> -def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
> +def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
>                              (add (sequence "SGPR%u", 0, 101))>;
>  
>  // SGPR 64-bit registers
> @@ -113,7 +113,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
>                                 (add (decimate (shl SGPR_32, 15), 4))]>;
>  
>  // VGPR 32-bit registers
> -def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
> +def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
>                              (add (sequence "VGPR%u", 0, 255))>;
>  
>  // VGPR 64-bit registers
> @@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
>  //  Register classes used as source and destination
>  //===----------------------------------------------------------------------===//
>  
> +class RegImmMatcher<string name> : AsmOperandClass {
> +  let Name = name;
> +  let RenderMethod = "addRegOrImmOperands";
> +}
> +
>  // Special register classes for predicates and the M0 register
>  def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
>    let CopyCost = -1; // Theoretically it is possible to read from SCC,
> @@ -180,7 +185,7 @@ def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
>  def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
>  
>  // Register class for all scalar registers (SGPRs + Special Registers)
> -def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
> +def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
>    (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
>  >;
>  
> @@ -227,15 +232,21 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
>  //  SSrc_* Operands with an SGPR or a 32-bit immediate
>  //===----------------------------------------------------------------------===//
>  
> -def SSrc_32 : RegImmOperand<SReg_32>;
> +def SSrc_32 : RegImmOperand<SReg_32> {
> +  let ParserMatchClass = RegImmMatcher<"SSrc32">;
> +}
>  
> -def SSrc_64 : RegImmOperand<SReg_64>;
> +def SSrc_64 : RegImmOperand<SReg_64> {
> +  let ParserMatchClass = RegImmMatcher<"SSrc64">;
> +}
>  
>  //===----------------------------------------------------------------------===//
>  //  SCSrc_* Operands with an SGPR or a inline constant
>  //===----------------------------------------------------------------------===//
>  
> -def SCSrc_32 : RegInlineOperand<SReg_32>;
> +def SCSrc_32 : RegInlineOperand<SReg_32> {
> +  let ParserMatchClass = RegImmMatcher<"SCSrc32">;
> +}
>  
>  //===----------------------------------------------------------------------===//
>  //  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
> @@ -245,14 +256,30 @@ def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
>  
>  def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
>  
> -def VSrc_32 : RegImmOperand<VS_32>;
> +def VSrc_32 : RegisterOperand<VS_32> {
> +  let OperandNamespace = "AMDGPU";
> +  let OperandType = "OPERAND_REG_IMM32";
> +  let ParserMatchClass = RegImmMatcher<"VSrc32">;
> +}
>  
> -def VSrc_64 : RegImmOperand<VS_64>;
> +def VSrc_64 : RegisterOperand<VS_64> {
> +  let OperandNamespace = "AMDGPU";
> +  let OperandType = "OPERAND_REG_IMM32";
> +  let ParserMatchClass = RegImmMatcher<"VSrc64">;
> +}
>  
>  //===----------------------------------------------------------------------===//
>  //  VCSrc_* Operands with an SGPR, VGPR or an inline constant
>  //===----------------------------------------------------------------------===//
>  
> -def VCSrc_32 : RegInlineOperand<VS_32>;
> +def VCSrc_32 : RegisterOperand<VS_32> {
> +  let OperandNamespace = "AMDGPU";
> +  let OperandType = "OPERAND_REG_INLINE_C";
> +  let ParserMatchClass = RegImmMatcher<"VCSrc32">;
> +}
>  
> -def VCSrc_64 : RegInlineOperand<VS_64>;
> +def VCSrc_64 : RegisterOperand<VS_64> {
> +  let OperandNamespace = "AMDGPU";
> +  let OperandType = "OPERAND_REG_INLINE_C";
> +  let ParserMatchClass = RegImmMatcher<"VCSrc64">;
> +}
> diff --git a/test/MC/R600/ds-err.s b/test/MC/R600/ds-err.s
> new file mode 100644
> index 0000000..52c2740
> --- /dev/null
> +++ b/test/MC/R600/ds-err.s
> @@ -0,0 +1,23 @@
> +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
> +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
> +
> +// offset too big
> +// CHECK: invalid operand for instruction
> +ds_add_u32 v2, v4 offset:1000000000
> +
> +// offset0 twice
> +// CHECK:  error: not a valid operand.
> +ds_write2_b32 v2, v4, v6 offset0:4 offset0:8
> +
> +// offset1 twice
> +// CHECK:  error: not a valid operand.
> +ds_write2_b32 v2, v4, v6 offset1:4 offset1:8
> +
> +// offset0 too big
> +// CHECK: invalid operand for instruction
> +ds_write2_b32 v2, v4, v6 offset0:1000000000
> +
> +// offset1 too big
> +// CHECK: invalid operand for instruction
> +ds_write2_b32 v2, v4, v6 offset1:1000000000
> +
> diff --git a/test/MC/R600/ds.s b/test/MC/R600/ds.s
> new file mode 100644
> index 0000000..ad63229
> --- /dev/null
> +++ b/test/MC/R600/ds.s
> @@ -0,0 +1,337 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI  -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Checks for 16-bit Offsets
> +//===----------------------------------------------------------------------===//
> +
> +ds_add_u32 v2, v4 offset:16
> +// CHECK: ds_add_u32 v2, v4 offset:16 ; encoding: [0x10,0x00,0x00,0xd8,0x02,0x04,0x00,0x00]
> +
> +//===----------------------------------------------------------------------===//
> +// Checks for 2 8-bit Offsets
> +//===----------------------------------------------------------------------===//
> +
> +ds_write2_b32 v2, v4, v6 offset0:4
> +// CHECK: ds_write2_b32 v2, v4, v6 offset0:4 ; encoding: [0x04,0x00,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write2_b32 v2, v4, v6 offset0:4 offset1:8
> +// CHECK: ds_write2_b32 v2, v4, v6 offset0:4 offset1:8 ; encoding: [0x04,0x08,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write2_b32 v2, v4, v6 offset1:8
> +// CHECK: ds_write2_b32 v2, v4, v6 offset1:8 ; encoding: [0x00,0x08,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_read2_b32 v[8:9], v2 offset0:4
> +// CHECK: ds_read2_b32 v[8:9], v2 offset0:4 ; encoding: [0x04,0x00,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b32 v[8:9], v2 offset0:4 offset1:8
> +// CHECK: ds_read2_b32 v[8:9], v2 offset0:4 offset1:8 ; encoding: [0x04,0x08,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b32 v[8:9], v2 offset1:8
> +// CHECK: ds_read2_b32 v[8:9], v2 offset1:8 ; encoding: [0x00,0x08,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +ds_add_u32 v2, v4
> +// CHECK: ds_add_u32 v2, v4 ; encoding: [0x00,0x00,0x00,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_sub_u32 v2, v4
> +// CHECK: ds_sub_u32 v2, v4 ; encoding: [0x00,0x00,0x04,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_rsub_u32 v2, v4
> +// CHECK: ds_rsub_u32 v2, v4 ; encoding: [0x00,0x00,0x08,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_inc_u32 v2, v4
> +// CHECK: ds_inc_u32 v2, v4 ; encoding: [0x00,0x00,0x0c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_dec_u32 v2, v4
> +// CHECK: ds_dec_u32 v2, v4 ; encoding: [0x00,0x00,0x10,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_min_i32 v2, v4
> +// CHECK: ds_min_i32 v2, v4 ; encoding: [0x00,0x00,0x14,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_max_i32 v2, v4
> +// CHECK: ds_max_i32 v2, v4 ; encoding: [0x00,0x00,0x18,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_min_u32 v2, v4
> +// CHECK: ds_min_u32 v2, v4 ; encoding: [0x00,0x00,0x1c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_max_u32 v2, v4
> +// CHECK: ds_max_u32 v2, v4 ; encoding: [0x00,0x00,0x20,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_and_b32 v2, v4
> +// CHECK: ds_and_b32 v2, v4 ; encoding: [0x00,0x00,0x24,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_or_b32 v2, v4
> +// CHECK: ds_or_b32 v2, v4 ; encoding: [0x00,0x00,0x28,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_xor_b32 v2, v4
> +// CHECK: ds_xor_b32 v2, v4 ; encoding: [0x00,0x00,0x2c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_mskor_b32 v2, v4, v6
> +// CHECK: ds_mskor_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x30,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write_b32 v2, v4
> +// CHECK: ds_write_b32 v2, v4 ; encoding: [0x00,0x00,0x34,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_write2_b32 v2, v4, v6
> +// CHECK: ds_write2_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write2st64_b32 v2, v4, v6
> +// CHECK: ds_write2st64_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x3c,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_b32 v2, v4, v6
> +// CHECK: ds_cmpst_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x40,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_f32 v2, v4, v6
> +// CHECK: ds_cmpst_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x44,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_min_f32 v2, v4, v6
> +// CHECK: ds_min_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x48,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_max_f32 v2, v4, v6
> +// CHECK: ds_max_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x4c,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_gws_init v2 gds
> +// CHECK: ds_gws_init v2 gds ; encoding: [0x00,0x00,0x66,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_sema_v v2 gds
> +// CHECK: ds_gws_sema_v v2 gds ; encoding: [0x00,0x00,0x6a,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_sema_br v2 gds
> +// CHECK: ds_gws_sema_br v2 gds ; encoding: [0x00,0x00,0x6e,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_sema_p v2 gds
> +// CHECK: ds_gws_sema_p v2 gds ; encoding: [0x00,0x00,0x72,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_barrier v2 gds
> +// CHECK: ds_gws_barrier v2 gds ; encoding: [0x00,0x00,0x76,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_write_b8 v2, v4
> +// CHECK: ds_write_b8 v2, v4 ; encoding: [0x00,0x00,0x78,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_write_b16 v2, v4
> +// CHECK: ds_write_b16 v2, v4 ; encoding: [0x00,0x00,0x7c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_add_rtn_u32 v8, v2, v4
> +// CHECK: ds_add_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x80,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_sub_rtn_u32 v8, v2, v4
> +// CHECK: ds_sub_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x84,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_rsub_rtn_u32 v8, v2, v4
> +// CHECK: ds_rsub_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x88,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_inc_rtn_u32 v8, v2, v4
> +// CHECK: ds_inc_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x8c,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_dec_rtn_u32 v8, v2, v4
> +// CHECK: ds_dec_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x90,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_i32 v8, v2, v4
> +// CHECK: ds_min_rtn_i32 v8, v2, v4 ; encoding: [0x00,0x00,0x94,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_i32 v8, v2, v4
> +// CHECK: ds_max_rtn_i32 v8, v2, v4 ; encoding: [0x00,0x00,0x98,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_u32 v8, v2, v4
> +// CHECK: ds_min_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x9c,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_u32 v8, v2, v4
> +// CHECK: ds_max_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0xa0,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_and_rtn_b32 v8, v2, v4
> +// CHECK: ds_and_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xa4,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_or_rtn_b32 v8, v2, v4
> +// CHECK: ds_or_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xa8,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_xor_rtn_b32 v8, v2, v4
> +// CHECK: ds_xor_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xac,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_mskor_rtn_b32 v8, v2, v4, v6
> +// CHECK: ds_mskor_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xb0,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg_rtn_b32 v8, v2, v4
> +// CHECK: ds_wrxchg_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xb4,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6
> +// CHECK: ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xb8,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg2st64_rtn_b32 v[8:9] v2, v4, v6
> +// CHECK: ds_wrxchg2st64_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xbc,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_b32 v8, v2, v4, v6
> +// CHECK: ds_cmpst_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc0,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_f32 v8, v2, v4, v6
> +// CHECK: ds_cmpst_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc4,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_min_rtn_f32 v8, v2, v4, v6
> +// CHECK: ds_min_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc8,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_max_rtn_f32 v8, v2, v4, v6
> +// CHECK: ds_max_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xcc,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_swizzle_b32 v8, v2
> +// CHECK: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_b32 v8, v2
> +// CHECK: ds_read_b32 v8, v2 ; encoding: [0x00,0x00,0xd8,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b32 v[8:9], v2
> +// CHECK: ds_read2_b32 v[8:9], v2 ; encoding: [0x00,0x00,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2st64_b32 v[8:9], v2
> +// CHECK: ds_read2st64_b32 v[8:9], v2 ; encoding: [0x00,0x00,0xe0,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_i8 v8, v2
> +// CHECK: ds_read_i8 v8, v2 ; encoding: [0x00,0x00,0xe4,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_u8 v8, v2
> +// CHECK: ds_read_u8 v8, v2 ; encoding: [0x00,0x00,0xe8,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_i16 v8, v2
> +// CHECK: ds_read_i16 v8, v2 ; encoding: [0x00,0x00,0xec,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_u16 v8, v2
> +// CHECK: ds_read_u16 v8, v2 ; encoding: [0x00,0x00,0xf0,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_consume v8
> +// CHECK: ds_consume v8 ; encoding: [0x00,0x00,0xf4,0xd8,0x00,0x00,0x00,0x08]
> +
> +ds_append v8
> +// CHECK: ds_append v8 ; encoding: [0x00,0x00,0xf8,0xd8,0x00,0x00,0x00,0x08]
> +
> +ds_ordered_count v8, v2 gds
> +// CHECK: ds_ordered_count v8, v2 gds ; encoding: [0x00,0x00,0xfe,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_add_u64 v2, v[4:5]
> +// CHECK: ds_add_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x00,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_sub_u64 v2, v[4:5]
> +// CHECK: ds_sub_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x04,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_rsub_u64 v2, v[4:5]
> +// CHECK: ds_rsub_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x08,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_inc_u64 v2, v[4:5]
> +// CHECK: ds_inc_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x0c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_dec_u64 v2, v[4:5]
> +// CHECK: ds_dec_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x10,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_min_i64 v2, v[4:5]
> +// CHECK: ds_min_i64 v2, v[4:5] ; encoding: [0x00,0x00,0x14,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_max_i64 v2, v[4:5]
> +// CHECK: ds_max_i64 v2, v[4:5] ; encoding: [0x00,0x00,0x18,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_min_u64 v2, v[4:5]
> +// CHECK: ds_min_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x1c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_max_u64 v2, v[4:5]
> +// CHECK: ds_max_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x20,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_and_b64 v2, v[4:5]
> +// CHECK: ds_and_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x24,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_or_b64 v2, v[4:5]
> +// CHECK: ds_or_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x28,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_xor_b64 v2, v[4:5]
> +// CHECK: ds_xor_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x2c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_mskor_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_mskor_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x30,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_write_b64 v2, v[4:5]
> +// CHECK: ds_write_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x34,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_write2_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_write2_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x38,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_write2st64_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_write2st64_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x3c,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x40,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_f64 v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_f64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x44,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_min_f64 v2, v[4:5]
> +// CHECK: ds_min_f64 v2, v[4:5] ; encoding: [0x00,0x00,0x48,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_max_f64 v2, v[4:5]
> +// CHECK: ds_max_f64 v2, v[4:5] ; encoding: [0x00,0x00,0x4c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_add_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_add_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x80,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_sub_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_sub_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x84,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_rsub_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_rsub_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x88,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_inc_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_inc_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x8c,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_dec_rtn_u64 v[8:9] v2, v[4:5]
> +// CHECK: ds_dec_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x90,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_i64 v[8:9], v2, v[4:5]
> +// CHECK: ds_min_rtn_i64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x94,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_i64 v[8:9], v2, v[4:5]
> +// CHECK: ds_max_rtn_i64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x98,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_min_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x9c,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_max_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa0,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_and_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_and_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa4,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_or_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_or_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa8,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_xor_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_xor_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xac,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_mskor_rtn_b64 v[8:9], v2, v[4:5], v[6:7]
> +// CHECK: ds_mskor_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xb0,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_wrxchg_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xb4,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
> +// CHECK: ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xb8,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
> +// CHECK: ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xbc,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xc0,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_f64 v[8:9], v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_rtn_f64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xc4,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_min_rtn_f64 v[8:9], v2, v[4:5]
> +// CHECK: ds_min_rtn_f64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xc8,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_f64 v[8:9], v2, v[4:5]
> +// CHECK: ds_max_rtn_f64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xcc,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_read_b64 v[8:9], v2
> +// CHECK: ds_read_b64 v[8:9], v2 ; encoding: [0x00,0x00,0xd8,0xd9,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b64 v[8:11], v2
> +// CHECK: ds_read2_b64 v[8:11], v2 ; encoding: [0x00,0x00,0xdc,0xd9,0x02,0x00,0x00,0x08]
> +
> +ds_read2st64_b64 v[8:11], v2
> +// CHECK: ds_read2st64_b64 v[8:11], v2 ; encoding: [0x00,0x00,0xe0,0xd9,0x02,0x00,0x00,0x08]
> diff --git a/test/MC/R600/mubuf.s b/test/MC/R600/mubuf.s
> new file mode 100644
> index 0000000..78d365a
> --- /dev/null
> +++ b/test/MC/R600/mubuf.s
> @@ -0,0 +1,352 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Test for different operand combinations
> +//===----------------------------------------------------------------------===//
> +
> +//===----------------------------------------------------------------------===//
> +// load - immediate offset only
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, s[4:7], s1
> +// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 glc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 slc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 tfe
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 tfe glc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - vgpr offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen  offset:4 glc ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen tfe glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - vgpr index
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen tfe glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - vgpr index and offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - addr64
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - immediate offset only
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, s[4:7], s1
> +// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 glc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 slc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 tfe
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 tfe glc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - vgpr offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen  offset:4 glc ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen tfe glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - vgpr index
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen tfe glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - vgpr index and offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - addr64
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_format_x v1, s[4:7], s1
> +// CHECK: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_format_xy v[1:2], s[4:7], s1
> +// CHECK: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_format_xyz v[1:3], s[4:7], s1
> +// CHECK: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_format_xyzw v[1:4], s[4:7], s1
> +// CHECK: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_x v1, s[4:7], s1
> +// CHECK: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_xy v[1:2], s[4:7], s1
> +// CHECK: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_xyz v[1:3], s[4:7], s1
> +// CHECK: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_xyzw v[1:4], s[4:7], s1
> +// CHECK: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_ubyte v1, s[4:7], s1
> +// CHECK: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_sbyte v1, s[4:7], s1
> +// CHECK: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_ushort v1, s[4:7], s1
> +// CHECK: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_sshort v1, s[4:7], s1
> +// CHECK: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1
> +// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dwordx2 v[1:2], s[4:7], s1
> +// CHECK: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dwordx4 v[1:4], s[4:7], s1
> +// CHECK: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_byte v1, s[4:7], s1
> +// CHECK: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_short v1, s[4:7], s1
> +// CHECK: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1 s[4:7], s1
> +// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dwordx2 v[1:2], s[4:7], s1
> +// CHECK: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dwordx4 v[1:4], s[4:7], s1
> +// CHECK: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
> +
> +// TODO: Atomics
> diff --git a/test/MC/R600/smrd.s b/test/MC/R600/smrd.s
> new file mode 100644
> index 0000000..b67abf7
> --- /dev/null
> +++ b/test/MC/R600/smrd.s
> @@ -0,0 +1,32 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +s_load_dword s1, s[2:3], 1
> +// CHECK: s_load_dword s1, s[2:3], 0x1 ; encoding: [0x01,0x83,0x00,0xc0]
> +
> +s_load_dword s1, s[2:3], s4
> +// CHECK: s_load_dword s1, s[2:3], s4 ; encoding: [0x04,0x82,0x00,0xc0]
> +
> +s_load_dwordx2 s[2:3], s[2:3], 1
> +// CHECK: s_load_dwordx2 s[2:3], s[2:3], 0x1 ; encoding: [0x01,0x03,0x41,0xc0]
> +
> +s_load_dwordx2 s[2:3], s[2:3], s4
> +// CHECK: s_load_dwordx2 s[2:3], s[2:3], s4 ; encoding: [0x04,0x02,0x41,0xc0]
> +
> +s_load_dwordx4 s[4:7], s[2:3], 1
> +// CHECK: s_load_dwordx4 s[4:7], s[2:3], 0x1 ; encoding: [0x01,0x03,0x82,0xc0]
> +
> +s_load_dwordx4 s[4:7], s[2:3], s4
> +// CHECK: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
> +
> +s_load_dwordx8 s[8:15], s[2:3], 1
> +// CHECK: s_load_dwordx8 s[8:15], s[2:3], 0x1 ; encoding: [0x01,0x03,0xc4,0xc0]
> +
> +s_load_dwordx8 s[8:15], s[2:3], s4
> +// CHECK: s_load_dwordx8 s[8:15], s[2:3], s4 ; encoding: [0x04,0x02,0xc4,0xc0]
> +
> +s_load_dwordx16 s[16:31], s[2:3], 1
> +// CHECK: s_load_dwordx16 s[16:31], s[2:3], 0x1 ; encoding: [0x01,0x03,0x08,0xc1]
> +
> +s_load_dwordx16 s[16:31], s[2:3], s4
> +// CHECK: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1]
> diff --git a/test/MC/R600/sop1-err.s b/test/MC/R600/sop1-err.s
> new file mode 100644
> index 0000000..f892356
> --- /dev/null
> +++ b/test/MC/R600/sop1-err.s
> @@ -0,0 +1,37 @@
> +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
> +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
> +
> +s_mov_b32 v1, s2
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s1, v0
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s[1:2], s0
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s0, s[1:2]
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s220, s0
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s0, s220
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b64 s1, s[0:1]
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b64 s[0:1], s1
> +// CHECK: error: invalid operand for instruction
> +
> +// Immediate greater than 32-bits
> +s_mov_b32 s1, 0xfffffffff
> +// CHECK: error: invalid immediate: only 32-bit values are legal
> +
> +// Immediate greater than 32-bits
> +s_mov_b64 s[0:1], 0xfffffffff
> +// CHECK: error: invalid immediate: only 32-bit values are legal
> +
> +// Out of range register
> +s_mov_b32 s
> diff --git a/test/MC/R600/sop1.s b/test/MC/R600/sop1.s
> new file mode 100644
> index 0000000..92ca73f
> --- /dev/null
> +++ b/test/MC/R600/sop1.s
> @@ -0,0 +1,177 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +s_mov_b32 s1, s2
> +// CHECK: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe]
> +
> +s_mov_b32 s1, 1
> +// CHECK: s_mov_b32 s1, 1 ; encoding: [0x81,0x03,0x81,0xbe]
> +
> +s_mov_b32 s1, 100
> +// CHECK: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00]
> +
> +s_mov_b64 s[2:3], s[4:5]
> +// CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
> +
> +s_mov_b64 s[2:3], 0xffffffffffffffff
> +// CHECK: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe]
> +
> +s_cmov_b32 s1, 200
> +// CHECK: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x05,0x81,0xbe,0xc8,0x00,0x00,0x00]
> +
> +s_cmov_b32 s1, 1.0
> +// CHECK: s_cmov_b32 s1, 1.0 ; encoding: [0xf2,0x05,0x81,0xbe]
> +
> +//s_cmov_b64 s[2:3], 1.0
> +//CHECK-FIXME: s_cmov_b64 s[2:3], 1.0 ; encoding: [0xf2,0x05,0x82,0xb3]
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +s_mov_b32 s1, s2
> +// CHECK: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe]
> +
> +s_mov_b64 s[2:3], s[4:5]
> +// CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
> +
> +s_cmov_b32 s1, s2
> +// CHECK: s_cmov_b32 s1, s2 ; encoding: [0x02,0x05,0x81,0xbe]
> +
> +s_cmov_b64 s[2:3], s[4:5]
> +// CHECK: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x06,0x82,0xbe]
> +
> +s_not_b32 s1, s2
> +// CHECK: s_not_b32 s1, s2 ; encoding: [0x02,0x07,0x81,0xbe]
> +
> +s_not_b64 s[2:3], s[4:5]
> +// CHECK: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x08,0x82,0xbe]
> +
> +s_wqm_b32 s1, s2
> +// CHECK: s_wqm_b32 s1, s2 ; encoding: [0x02,0x09,0x81,0xbe]
> +
> +s_wqm_b64 s[2:3], s[4:5]
> +// CHECK: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0a,0x82,0xbe]
> +
> +s_brev_b32 s1, s2
> +// CHECK: s_brev_b32 s1, s2 ; encoding: [0x02,0x0b,0x81,0xbe]
> +
> +s_brev_b64 s[2:3], s[4:5]
> +// CHECK: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0c,0x82,0xbe]
> +
> +s_bcnt0_i32_b32 s1, s2
> +// CHECK: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0d,0x81,0xbe]
> +
> +s_bcnt0_i32_b64 s1, s[2:3]
> +// CHECK: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0e,0x81,0xbe]
> +
> +s_bcnt1_i32_b32 s1, s2
> +// CHECK: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0f,0x81,0xbe]
> +
> +s_bcnt1_i32_b64 s1, s[2:3]
> +// CHECK: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x10,0x81,0xbe]
> +
> +s_ff0_i32_b32 s1, s2
> +// CHECK: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x11,0x81,0xbe]
> +
> +s_ff0_i32_b64 s1, s[2:3]
> +// CHECK: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x12,0x81,0xbe]
> +
> +s_ff1_i32_b32 s1, s2
> +// CHECK: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x13,0x81,0xbe]
> +
> +s_ff1_i32_b64 s1, s[2:3]
> +// CHECK: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x14,0x81,0xbe]
> +
> +s_flbit_i32_b32 s1, s2
> +// CHECK: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x15,0x81,0xbe]
> +
> +s_flbit_i32_b64 s1, s[2:3]
> +// CHECK: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x16,0x81,0xbe]
> +
> +s_flbit_i32 s1, s2
> +// CHECK: s_flbit_i32 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe]
> +
> +s_flbit_i32_i64 s1, s[2:3]
> +// CHECK: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x18,0x81,0xbe]
> +
> +s_sext_i32_i8 s1, s2
> +// CHECK: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x19,0x81,0xbe]
> +
> +s_sext_i32_i16 s1, s2
> +// CHECK: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe]
> +
> +s_bitset0_b32 s1, s2
> +// CHECK: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x1b,0x81,0xbe]
> +
> +s_bitset0_b64 s[2:3], s[4:5]
> +// CHECK: s_bitset0_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1c,0x82,0xbe]
> +
> +s_bitset1_b32 s1, s2
> +// CHECK: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1d,0x81,0xbe]
> +
> +s_bitset1_b64 s[2:3], s[4:5]
> +// CHECK: s_bitset1_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1e,0x82,0xbe]
> +
> +s_getpc_b64 s[2:3]
> +// CHECK: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1f,0x82,0xbe]
> +
> +s_setpc_b64 s[2:3], s[4:5]
> +// CHECK: s_setpc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x20,0x82,0xbe]
> +
> +s_swappc_b64 s[2:3], s[4:5]
> +// CHECK: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe]
> +
> +s_rfe_b64 s[2:3], s[4:5]
> +// CHECK: s_rfe_b64 s[2:3], s[4:5] ; encoding: [0x04,0x22,0x82,0xbe]
> +
> +s_and_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe]
> +
> +s_or_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe]
> +
> +s_xor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe]
> +
> +s_andn2_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe]
> +
> +s_orn2_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x28,0x82,0xbe]
> +
> +s_nand_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe]
> +
> +s_nor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2a,0x82,0xbe]
> +
> +s_xnor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe]
> +
> +s_quadmask_b32 s1, s2
> +// CHECK: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe]
> +
> +s_quadmask_b64 s[2:3], s[4:5]
> +// CHECK: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe]
> +
> +s_movrels_b32 s1, s2
> +// CHECK: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2e,0x81,0xbe]
> +
> +s_movrels_b64 s[2:3], s[4:5]
> +// CHECK: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2f,0x82,0xbe]
> +
> +s_movreld_b32 s1, s2
> +// CHECK: s_movreld_b32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe]
> +
> +s_movreld_b64 s[2:3], s[4:5]
> +// CHECK: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x31,0x82,0xbe]
> +
> +s_cbranch_join s[4:5]
> +// CHECK: s_cbranch_join s[4:5] ; encoding: [0x04,0x32,0x80,0xbe]
> +
> +s_abs_i32 s1, s2
> +// CHECK: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe]
> +
> +s_mov_fed_b32 s1, s2
> +// CHECK: s_mov_fed_b32 s1, s2 ; encoding: [0x02,0x35,0x81,0xbe]
> diff --git a/test/MC/R600/sop2.s b/test/MC/R600/sop2.s
> new file mode 100644
> index 0000000..9a7a1c0
> --- /dev/null
> +++ b/test/MC/R600/sop2.s
> @@ -0,0 +1,131 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +// CHECK: s_add_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x80]
> +s_add_u32 s1, s2, s3
> +
> +// CHECK: s_sub_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x80]
> +s_sub_u32 s1, s2, s3
> +
> +// CHECK: s_add_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x81]
> +s_add_i32 s1, s2, s3
> +
> +// CHECK: s_sub_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x81]
> +s_sub_i32 s1, s2, s3
> +
> +// CHECK: s_addc_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x82]
> +s_addc_u32 s1, s2, s3
> +
> +// CHECK: s_subb_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x82]
> +s_subb_u32 s1, s2, s3
> +
> +// CHECK: s_min_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x83]
> +s_min_i32 s1, s2, s3
> +
> +// CHECK: s_min_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x83]
> +s_min_u32 s1, s2, s3
> +
> +// CHECK: s_max_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x84]
> +s_max_i32 s1, s2, s3
> +
> +// CHECK: s_max_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x84]
> +s_max_u32 s1, s2, s3
> +
> +// CHECK: s_cselect_b32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x85]
> +s_cselect_b32 s1, s2, s3
> +
> +// CHECK: s_cselect_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x85]
> +s_cselect_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_and_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x87]
> +s_and_b32 s2, s4, s6
> +
> +// CHECK: s_and_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x87]
> +s_and_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_or_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x88]
> +s_or_b32 s2, s4, s6
> +
> +// CHECK: s_or_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x88]
> +s_or_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_xor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x89]
> +s_xor_b32 s2, s4, s6
> +
> +// CHECK: s_xor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x89]
> +s_xor_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_andn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8a]
> +s_andn2_b32 s2, s4, s6
> +
> +// CHECK: s_andn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8a]
> +s_andn2_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_orn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8b]
> +s_orn2_b32 s2, s4, s6
> +
> +// CHECK: s_orn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8b]
> +s_orn2_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_nand_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8c]
> +s_nand_b32 s2, s4, s6
> +
> +// CHECK: s_nand_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8c]
> +s_nand_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_nor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8d]
> +s_nor_b32 s2, s4, s6
> +
> +// CHECK: s_nor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8d]
> +s_nor_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_xnor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8e]
> +s_xnor_b32 s2, s4, s6
> +
> +// CHECK: s_xnor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8e]
> +s_xnor_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_lshl_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8f]
> +s_lshl_b32 s2, s4, s6
> +
> +// CHECK: s_lshl_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8f]
> +s_lshl_b64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_lshr_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x90]
> +s_lshr_b32 s2, s4, s6
> +
> +// CHECK: s_lshr_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x90]
> +s_lshr_b64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_ashr_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x91]
> +s_ashr_i32 s2, s4, s6
> +
> +// CHECK: s_ashr_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x91]
> +s_ashr_i64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_bfm_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x92]
> +s_bfm_b32 s2, s4, s6
> +
> +// CHECK: s_bfm_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x92]
> +s_bfm_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_mul_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x93]
> +s_mul_i32 s2, s4, s6
> +
> +// CHECK: s_bfe_u32 s2, s4, s6 ; encoding: [0x04,0x06,0x82,0x93]
> +s_bfe_u32 s2, s4, s6
> +
> +// CHECK: s_bfe_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x94]
> +s_bfe_i32 s2, s4, s6
> +
> +// CHECK: s_bfe_u64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x94]
> +s_bfe_u64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_bfe_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x02,0x95]
> +s_bfe_i64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_cbranch_g_fork s[4:5], s[6:7] ; encoding: [0x04,0x06,0x80,0x95]
> +s_cbranch_g_fork s[4:5], s[6:7]
> +
> +// CHECK: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x96]
> +s_absdiff_i32 s2, s4, s6
> diff --git a/test/MC/R600/sopc.s b/test/MC/R600/sopc.s
> new file mode 100644
> index 0000000..0899c1a
> --- /dev/null
> +++ b/test/MC/R600/sopc.s
> @@ -0,0 +1,9 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +s_cmp_eq_i32 s1, s2
> +// CHECK: s_cmp_eq_i32 s1, s2 ; encoding: [0x01,0x02,0x00,0xbf]
> diff --git a/test/MC/R600/sopk.s b/test/MC/R600/sopk.s
> new file mode 100644
> index 0000000..6c27aac
> --- /dev/null
> +++ b/test/MC/R600/sopk.s
> @@ -0,0 +1,66 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +s_movk_i32 s2, 0x6
> +// CHECK: s_movk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb0]
> +
> +s_cmovk_i32 s2, 0x6
> +// CHECK: s_cmovk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb1]
> +
> +s_cmpk_eq_i32 s2, 0x6
> +// CHECK: s_cmpk_eq_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb1]
> +
> +s_cmpk_lg_i32 s2, 0x6
> +// CHECK: s_cmpk_lg_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb2]
> +
> +s_cmpk_gt_i32 s2, 0x6
> +// CHECK: s_cmpk_gt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb2]
> +
> +s_cmpk_ge_i32 s2, 0x6
> +// CHECK: s_cmpk_ge_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb3]
> +
> +s_cmpk_lt_i32 s2, 0x6
> +// CHECK: s_cmpk_lt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb3]
> +
> +s_cmpk_le_i32 s2, 0x6
> +// CHECK: s_cmpk_le_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb4]
> +
> +s_cmpk_eq_u32 s2, 0x6
> +// CHECK: s_cmpk_eq_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb4]
> +
> +s_cmpk_lg_u32 s2, 0x6
> +// CHECK: s_cmpk_lg_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb5]
> +
> +s_cmpk_gt_u32 s2, 0x6
> +// CHECK: s_cmpk_gt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb5]
> +
> +s_cmpk_ge_u32 s2, 0x6
> +// CHECK: s_cmpk_ge_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb6]
> +
> +s_cmpk_lt_u32 s2, 0x6
> +// CHECK: s_cmpk_lt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb6]
> +
> +s_cmpk_le_u32 s2, 0x6
> +// CHECK: s_cmpk_le_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb7]
> +
> +s_addk_i32 s2, 0x6
> +// CHECK: s_addk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb7]
> +
> +s_mulk_i32 s2, 0x6
> +// CHECK: s_mulk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb8]
> +
> +s_cbranch_i_fork s[2:3], 0x6
> +// CHECK: s_cbranch_i_fork s[2:3], 0x6 ; encoding: [0x06,0x00,0x82,0xb8]
> +
> +s_getreg_b32 s2, 0x6
> +// CHECK: s_getreg_b32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb9]
> +
> +s_setreg_b32 s2, 0x6
> +// CHECK: s_setreg_b32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb9]
> +
> +s_setreg_imm32_b32 0xff, 0x6
> +// CHECK: s_setreg_imm32_b32 0xff, 0x6 ; encoding: [0x06,0x00,0x80,0xba,0xff,0x00,0x00,0x00]
> diff --git a/test/MC/R600/sopp.s b/test/MC/R600/sopp.s
> index 0f186b1..b072c16 100644
> --- a/test/MC/R600/sopp.s
> +++ b/test/MC/R600/sopp.s
> @@ -1,4 +1,16 @@
> -// RUN: llvm-mc -arch=amdgcn -mcpu=SI  -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Edge Cases
> +//===----------------------------------------------------------------------===//
> +
> +s_nop 0       // CHECK: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf] 
> +s_nop 0xffff  // CHECK: s_nop 0xffff ; encoding: [0xff,0xff,0x80,0xbf]
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
>  
>    s_nop 1            // CHECK: s_nop 1 ; encoding: [0x01,0x00,0x80,0xbf]
>    s_endpgm           // CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
> diff --git a/test/MC/R600/vop1.s b/test/MC/R600/vop1.s
> new file mode 100644
> index 0000000..9c9a6b2
> --- /dev/null
> +++ b/test/MC/R600/vop1.s
> @@ -0,0 +1,182 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +// CHECK: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
> +v_nop
> +
> +// CHECK: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e]
> +v_mov_b32 v1, v2
> +
> +// CHECK: v_readfirstlane_b32 s1, v2 ; encoding: [0x02,0x05,0x02,0x7e]
> +v_readfirstlane_b32 s1, v2
> +
> +// CHECK: v_cvt_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x07,0x02,0x7e]
> +v_cvt_i32_f64 v1, v[2:3]
> +
> +// CHECK: v_cvt_f64_i32_e32 v[1:2], v2 ; encoding: [0x02,0x09,0x02,0x7e]
> +v_cvt_f64_i32 v[1:2], v2
> +
> +// CHECK: v_cvt_f32_i32_e32 v1, v2 ; encoding: [0x02,0x0b,0x02,0x7e]
> +v_cvt_f32_i32 v1, v2
> +
> +// CHECK: v_cvt_f32_u32_e32 v1, v2 ; encoding: [0x02,0x0d,0x02,0x7e]
> +v_cvt_f32_u32 v1, v2
> +
> +// CHECK: v_cvt_u32_f32_e32 v1, v2 ; encoding: [0x02,0x0f,0x02,0x7e
> +v_cvt_u32_f32 v1, v2
> +
> +// CHECK: v_cvt_i32_f32_e32 v1, v2 ; encoding: [0x02,0x11,0x02,0x7e]
> +v_cvt_i32_f32 v1, v2
> +
> +// CHECK: v_mov_fed_b32_e32 v1, v2 ; encoding: [0x02,0x13,0x02,0x7e]
> +v_mov_fed_b32 v1, v2
> +
> +// CHECK: v_cvt_f16_f32_e32 v1, v2 ; encoding: [0x02,0x15,0x02,0x7e]
> +v_cvt_f16_f32 v1, v2
> +
> +// CHECK: v_cvt_f32_f16_e32 v1, v2 ; encoding: [0x02,0x17,0x02,0x7e]
> +v_cvt_f32_f16 v1, v2
> +
> +// CHECK: v_cvt_rpi_i32_f32_e32 v1, v2 ; encoding: [0x02,0x19,0x02,0x7e]
> +v_cvt_rpi_i32_f32 v1, v2
> +
> +// CHECK: v_cvt_flr_i32_f32_e32 v1, v2 ; encoding: [0x02,0x1b,0x02,0x7e]
> +v_cvt_flr_i32_f32 v1, v2
> +
> +// CHECK: v_cvt_off_f32_i4_e32 v1, v2 ; encoding: [0x02,0x1d,0x02,0x7e]
> +v_cvt_off_f32_i4_e32 v1, v2
> +
> +// CHECK: v_cvt_f32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x1f,0x02,0x7e]
> +v_cvt_f32_f64 v1, v[2:3]
> +
> +// CHECK: v_cvt_f64_f32_e32 v[1:2], v2 ; encoding: [0x02,0x21,0x02,0x7e]
> +v_cvt_f64_f32 v[1:2], v2
> +
> +// CHECK: v_cvt_f32_ubyte0_e32 v1, v2 ; encoding: [0x02,0x23,0x02,0x7e]
> +v_cvt_f32_ubyte0 v1, v2
> +
> +// CHECK: v_cvt_f32_ubyte1_e32 v1, v2 ; encoding: [0x02,0x25,0x02,0x7e]
> +v_cvt_f32_ubyte1_e32 v1, v2
> +
> +// CHECK: v_cvt_f32_ubyte2_e32 v1, v2 ; encoding: [0x02,0x27,0x02,0x7e]
> +v_cvt_f32_ubyte2 v1, v2
> +
> +// CHECK: v_cvt_f32_ubyte3_e32 v1, v2 ; encoding: [0x02,0x29,0x02,0x7e]
> +v_cvt_f32_ubyte3 v1, v2
> +
> +// CHECK: v_cvt_u32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x2b,0x02,0x7e]
> +v_cvt_u32_f64 v1, v[2:3]
> +
> +// CHECK: v_cvt_f64_u32_e32 v[1:2], v2 ; encoding: [0x02,0x2d,0x02,0x7e]
> +v_cvt_f64_u32 v[1:2], v2
> +
> +// CHECK: v_fract_f32_e32 v1, v2 ; encoding: [0x02,0x41,0x02,0x7e]
> +v_fract_f32 v1, v2
> +
> +// CHECK: v_trunc_f32_e32 v1, v2 ; encoding: [0x02,0x43,0x02,0x7e]
> +v_trunc_f32 v1, v2
> +
> +// CHECK: v_ceil_f32_e32 v1, v2 ; encoding: [0x02,0x45,0x02,0x7e]
> +v_ceil_f32 v1, v2
> +
> +// CHECK: v_rndne_f32_e32 v1, v2 ; encoding: [0x02,0x47,0x02,0x7e]
> +v_rndne_f32 v1, v2
> +
> +// CHECK: v_floor_f32_e32 v1, v2 ; encoding: [0x02,0x49,0x02,0x7e]
> +v_floor_f32_e32 v1, v2
> +
> +// CHECK: v_exp_f32_e32 v1, v2 ; encoding: [0x02,0x4b,0x02,0x7e]
> +v_exp_f32 v1, v2
> +
> +// CHECK: v_log_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x4d,0x02,0x7e]
> +v_log_clamp_f32 v1, v2
> +
> +// CHECK: v_log_f32_e32 v1, v2 ; encoding: [0x02,0x4f,0x02,0x7e]
> +v_log_f32 v1, v2
> +
> +// CHECK: v_rcp_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x51,0x02,0x7e]
> +v_rcp_clamp_f32 v1, v2
> +
> +// CHECK: v_rcp_legacy_f32_e32 v1, v2 ; encoding: [0x02,0x53,0x02,0x7e]
> +v_rcp_legacy_f32 v1, v2
> +
> +// CHECK: v_rcp_f32_e32 v1, v2 ; encoding: [0x02,0x55,0x02,0x7e]
> +v_rcp_f32 v1, v2
> +
> +// CHECK: v_rcp_iflag_f32_e32 v1, v2 ; encoding: [0x02,0x57,0x02,0x7e]
> +v_rcp_iflag_f32 v1, v2
> +
> +// CHECK: v_rsq_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x59,0x02,0x7e]
> +v_rsq_clamp_f32 v1, v2
> +
> +// CHECK: v_rsq_legacy_f32_e32 v1, v2 ; encoding: [0x02,0x5b,0x02,0x7e]
> +v_rsq_legacy_f32 v1, v2
> +
> +// CHECK: v_rsq_f32_e32 v1, v2 ; encoding: [0x02,0x5d,0x02,0x7e]
> +v_rsq_f32_e32 v1, v2
> +
> +// CHECK: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x5f,0x02,0x7e]
> +v_rcp_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_rcp_clamp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x61,0x02,0x7e]
> +v_rcp_clamp_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_rsq_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x63,0x02,0x7e]
> +v_rsq_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_rsq_clamp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x65,0x02,0x7e]
> +v_rsq_clamp_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_sqrt_f32_e32 v1, v2 ; encoding: [0x02,0x67,0x02,0x7e]
> +v_sqrt_f32 v1, v2
> +
> +// CHECK: v_sqrt_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x69,0x02,0x7e]
> +v_sqrt_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_sin_f32_e32 v1, v2 ; encoding: [0x02,0x6b,0x02,0x7e]
> +v_sin_f32 v1, v2
> +
> +// CHECK: v_cos_f32_e32 v1, v2 ; encoding: [0x02,0x6d,0x02,0x7e]
> +v_cos_f32 v1, v2
> +
> +// CHECK: v_not_b32_e32 v1, v2 ; encoding: [0x02,0x6f,0x02,0x7e]
> +v_not_b32 v1, v2
> +
> +// CHECK: v_bfrev_b32_e32 v1, v2 ; encoding: [0x02,0x71,0x02,0x7e]
> +v_bfrev_b32 v1, v2
> +
> +// CHECK: v_ffbh_u32_e32 v1, v2 ; encoding: [0x02,0x73,0x02,0x7e]
> +v_ffbh_u32 v1, v2
> +
> +// CHECK: v_ffbl_b32_e32 v1, v2 ; encoding: [0x02,0x75,0x02,0x7e]
> +v_ffbl_b32 v1, v2
> +
> +// CHECK: v_ffbh_i32_e32 v1, v2 ; encoding: [0x02,0x77,0x02,0x7e]
> +v_ffbh_i32_e32 v1, v2
> +
> +// CHECK: v_frexp_exp_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x79,0x02,0x7e]
> +v_frexp_exp_i32_f64 v1, v[2:3]
> +
> +// CHECK: v_frexp_mant_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7b,0x02,0x7e]
> +v_frexp_mant_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_fract_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7d,0x02,0x7e]
> +v_fract_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_frexp_exp_i32_f32_e32 v1, v2 ; encoding: [0x02,0x7f,0x02,0x7e]
> +v_frexp_exp_i32_f32 v1, v2
> +
> +// CHECK: v_frexp_mant_f32_e32 v1, v2 ; encoding: [0x02,0x81,0x02,0x7e]
> +v_frexp_mant_f32 v1, v2
> +
> +// CHECK: v_clrexcp ; encoding: [0x00,0x82,0x00,0x7e]
> +v_clrexcp
> +
> +// CHECK: v_movreld_b32_e32 v1, v2 ; encoding: [0x02,0x85,0x02,0x7e]
> +v_movreld_b32 v1, v2
> +
> +// CHECK: v_movrels_b32_e32 v1, v2 ; encoding: [0x02,0x87,0x02,0x7e]
> +v_movrels_b32 v1, v2
> +
> +// CHECK: v_movrelsd_b32_e32 v1, v2 ; encoding: [0x02,0x89,0x02,0x7e]
> +v_movrelsd_b32 v1, v2
> diff --git a/test/MC/R600/vop2-err.s b/test/MC/R600/vop2-err.s
> new file mode 100644
> index 0000000..a113100
> --- /dev/null
> +++ b/test/MC/R600/vop2-err.s
> @@ -0,0 +1,35 @@
> +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
> +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Generic checks
> +//===----------------------------------------------------------------------===//
> +
> +v_mul_i32_i24 v1, v2, 100
> +// CHECK: error: invalid operand for instruction
> +
> +//===----------------------------------------------------------------------===//
> +// _e32 checks
> +//===----------------------------------------------------------------------===//
> +
> +// Immediate src1
> +v_mul_i32_i24_e32 v1, v2, 100
> +// CHECK: error: invalid operand for instruction
> +
> +// sgpr src1
> +v_mul_i32_i24_e32 v1, v2, s3
> +// CHECK: error: invalid operand for instruction
> +
> +//===----------------------------------------------------------------------===//
> +// _e64 checks
> +//===----------------------------------------------------------------------===//
> +
> +// Immediate src0
> +v_mul_i32_i24_e64 v1, 100, v3
> +// CHECK: error: invalid operand for instruction
> +
> +// Immediate src1
> +v_mul_i32_i24_e64 v1, v2, 100
> +// CHECK: error: invalid operand for instruction
> +
> +// TODO: Constant bus restrictions
> diff --git a/test/MC/R600/vop2.s b/test/MC/R600/vop2.s
> new file mode 100644
> index 0000000..6780088
> --- /dev/null
> +++ b/test/MC/R600/vop2.s
> @@ -0,0 +1,242 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Generic Checks for floating-point instructions (These have modifiers).
> +//===----------------------------------------------------------------------===//
> +
> +// TODO: 64-bit encoding of instructions with modifiers
> +
> +// _e32 suffix
> +// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
> +v_add_f32_e32 v1, v2, v3
> +
> +// src0 inline immediate
> +// CHECK: v_add_f32_e32 v1, 1.0, v3 ; encoding: [0xf2,0x06,0x02,0x06]
> +v_add_f32 v1, 1.0, v3
> +
> +// src0 negative inline immediate
> +// CHECK: v_add_f32_e32 v1, -1.0, v3 ; encoding: [0xf3,0x06,0x02,0x06]
> +v_add_f32 v1, -1.0, v3
> +
> +// src0 literal
> +// CHECK: v_add_f32_e32 v1, 0x42c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0x42]
> +v_add_f32 v1, 100.0, v3
> +
> +// src0 negative literal
> +// CHECK: v_add_f32_e32 v1, 0xc2c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0xc2]
> +v_add_f32 v1, -100.0, v3
> +
> +//===----------------------------------------------------------------------===//
> +// Generic Checks for integer instructions (These don't have modifiers).
> +//===----------------------------------------------------------------------===//
> +
> +// _e32 suffix
> +// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] 
> +v_mul_i32_i24_e32 v1, v2, v3
> +
> +// _e64 suffix
> +// CHECK: v_mul_i32_i24_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x02,0x00]
> +v_mul_i32_i24_e64 v1, v2, v3
> +
> +// src0 inline
> +// CHECK: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12]
> +v_mul_i32_i24 v1, 3, v3
> +
> +// src0 negative inline
> +// CHECK: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12]
> +v_mul_i32_i24 v1, -3, v3
> +
> +// src1 inline
> +// CHECK: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00]
> +v_mul_i32_i24 v1, v2, 3
> +
> +// src1 negative inline
> +// CHECK: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00]
> +v_mul_i32_i24 v1, v2, -3
> +
> +// src0 literal
> +// CHECK: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x12,0x64,0x00,0x00,0x00]
> +v_mul_i32_i24 v1, 100, v3
> +
> +// src1 negative literal
> +// CHECK: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x12,0x9c,0xff,0xff,0xff]
> +v_mul_i32_i24 v1, -100, v3
> +
> +//===----------------------------------------------------------------------===//
> +// Checks for legal operands
> +//===----------------------------------------------------------------------===//
> +
> +// src0 sgpr
> +// CHECK: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x12]
> +v_mul_i32_i24 v1, s2, v3
> +
> +// src1 sgpr
> +// CHECK: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x00,0x00]
> +v_mul_i32_i24 v1, v2, s3
> +
> +// src0, src1 same sgpr
> +// CHECK: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x04,0x00,0x00]
> +v_mul_i32_i24 v1, s2, s2
> +
> +// src0 sgpr, src1 inline
> +// CHECK: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x06,0x01,0x00]
> +v_mul_i32_i24 v1, s2, 3
> +
> +// src0 inline src1 sgpr
> +// CHECK: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00]
> +v_mul_i32_i24 v1, 3, s3
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +// CHECK: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00]
> +v_cndmask_b32 v1, v2, v3
> +
> +// CHECK: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02]
> +v_readlane_b32 s1, v2, s3
> +
> +// CHECK: v_writelane_b32 v1, s2, s3 ; encoding: [0x02,0x06,0x02,0x04]
> +v_writelane_b32 v1, s2, s3
> +
> +// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
> +v_add_f32 v1, v2, v3
> +
> +// CHECK: v_sub_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x08]
> +v_sub_f32 v1, v2, v3
> +
> +// CHECK: v_subrev_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0a]
> +v_subrev_f32 v1, v2, v3
> +
> +// CHECK: v_mac_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c]
> +v_mac_legacy_f32 v1, v2, v3
> +
> +// CHECK: v_mul_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e]
> +v_mul_legacy_f32_e32 v1, v2, v3
> +
> +// CHECK: v_mul_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10]
> +v_mul_f32 v1, v2, v3
> +
> +// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12]
> +v_mul_i32_i24 v1, v2, v3
> +
> +// CHECK: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14]
> +v_mul_hi_i32_i24 v1, v2, v3
> +
> +// CHECK: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16]
> +v_mul_u32_u24 v1, v2, v3
> +
> +// CHECK: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18]
> +v_mul_hi_u32_u24 v1, v2, v3
> +
> +// CHECK: v_min_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a]
> +v_min_legacy_f32_e32 v1, v2, v3
> +
> +// CHECK: v_max_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c]
> +v_max_legacy_f32 v1, v2, v3
> +
> +// CHECK: v_min_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e]
> +v_min_f32_e32 v1, v2, v3
> +
> +// CHECK: v_max_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20]
> +v_max_f32 v1, v2 v3
> +
> +// CHECK: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22]
> +v_min_i32 v1, v2, v3
> +
> +// CHECK: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24]
> +v_max_i32 v1, v2, v3
> +
> +// CHECK: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26]
> +v_min_u32 v1, v2, v3
> +
> +// CHECK: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28]
> +v_max_u32 v1, v2, v3
> +
> +// CHECK: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a]
> +v_lshr_b32 v1, v2, v3
> +
> +// CHECK: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c]
> +v_lshrrev_b32 v1, v2, v3
> +
> +// CHECK: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e]
> +v_ashr_i32 v1, v2, v3
> +
> +// CHECK: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30]
> +v_ashrrev_i32 v1, v2, v3
> +
> +// CHECK: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
> +v_lshl_b32_e32 v1, v2, v3
> +
> +// CHECK: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
> +v_lshlrev_b32 v1, v2, v3
> +
> +// CHECK: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
> +v_and_b32 v1, v2, v3
> +
> +// CHECK: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
> +v_or_b32 v1, v2, v3
> +
> +// CHECK: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
> +v_xor_b32 v1, v2, v3
> +
> +// CHECK: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
> +v_bfm_b32 v1, v2, v3
> +
> +// CHECK: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e]
> +v_mac_f32 v1, v2, v3
> +
> +// CHECK: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42]
> +v_madmk_f32 v1, v2, v3, 64.0
> +
> +// CHECK: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42]
> +v_madak_f32 v1, v2, v3, 64.0
> +
> +// CHECK: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44]
> +v_bcnt_u32_b32 v1, v2, v3
> +
> +// CHECK: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46]
> +v_mbcnt_lo_u32_b32 v1, v2, v3
> +
> +// CHECK: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48]
> +v_mbcnt_hi_u32_b32_e32 v1, v2, v3
> +
> +// CHECK: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
> +v_add_i32 v1, v2, v3
> +
> +// CHECK: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
> +v_sub_i32_e32 v1, v2, v3
> +
> +// CHECK: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
> +v_subrev_i32 v1, v2, v3
> +
> +// CHECK : v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
> +v_addc_u32 v1, v2, v3
> +
> +// CHECK: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
> +v_subb_u32 v1, v2, v3
> +
> +// CHECK: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
> +v_subbrev_u32 v1, v2, v3
> +
> +// CHECK: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
> +v_ldexp_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58]
> +v_cvt_pkaccum_u8_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a]
> +v_cvt_pknorm_i16_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c]
> +v_cvt_pknorm_u16_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e]
> +v_cvt_pkrtz_f16_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60]
> +v_cvt_pk_u16_u32_e32 v1, v2, v3
> +
> +// CHECK: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62]
> +v_cvt_pk_i16_i32 v1, v2, v3
> diff --git a/test/MC/R600/vop3.s b/test/MC/R600/vop3.s
> new file mode 100644
> index 0000000..7d1ba0b
> --- /dev/null
> +++ b/test/MC/R600/vop3.s
> @@ -0,0 +1,138 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// VOPC Instructions
> +//===----------------------------------------------------------------------===//
> +
> +//
> +// Modifier tests:
> +//
> +
> +v_cmp_lt_f32 s[2:3] -v4, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x20] 
> +
> +v_cmp_lt_f32 s[2:3]  v4, -v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
> +
> +v_cmp_lt_f32 s[2:3] -v4, -v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x60]
> +
> +v_cmp_lt_f32 s[2:3] |v4|, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3] v4, |v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3] |v4|, |v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3] -|v4|, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x20]
> +
> +v_cmp_lt_f32 s[2:3] v4, -|v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x40]
> +
> +v_cmp_lt_f32 s[2:3] -|v4|, -|v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x60]
> +
> +//
> +// Instruction tests:
> +//
> +
> +v_cmp_f_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_eq_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x04,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_le_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x06,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_gt_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x08,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lg_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0a,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_ge_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00]
> +
> +// TODO: Finish VOPC
> +
> +//===----------------------------------------------------------------------===//
> +// VOP1 Instructions
> +//===----------------------------------------------------------------------===//
> +
> +//
> +// Modifier tests:
> +// 
> +
> +v_fract_f32 v1, -v2
> +// CHECK: v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x20]
> +
> +v_fract_f32 v1, |v2|
> +// CHECK: v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x00]
> +
> +v_fract_f32 v1, -|v2|
> +// CHECK: v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x20]
> +
> +v_fract_f32 v1, v2 clamp
> +// CHECK: v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x00]
> +
> +v_fract_f32 v1, v2 mul:2
> +// CHECK: v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x08]
> +
> +v_fract_f32 v1, v2, div:2 clamp
> +// CHECK: v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x18]
> +
> +// TODO: Finish VOP1
> +
> +///===---------------------------------------------------------------------===//
> +// VOP2 Instructions
> +///===---------------------------------------------------------------------===//
> +
> +// TODO: Modifier tests
> +
> +v_cndmask_b32 v1, v3, v5, s[4:5]
> +// CHECK: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
> +
> +//TODO: readlane, writelane
> +
> +v_add_f32 v1, v3, s5
> +// CHECK: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_sub_f32 v1, v3, s5
> +// CHECK: v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x08,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_subrev_f32 v1, v3, s5
> +// CHECK: v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0a,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mac_legacy_f32 v1, v3, s5
> +// CHECK: v_mac_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mul_legacy_f32 v1, v3, s5
> +// CHECK: v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0e,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mul_f32 v1, v3, s5
> +// CHECK: v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x10,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mul_i32_i24 v1, v3, s5
> +// CHECK: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00]
> +
> +///===---------------------------------------------------------------------===//
> +// VOP3 Instructions
> +///===---------------------------------------------------------------------===//
> +
> +// TODO: Modifier tests
> +
> +v_mad_legacy_f32 v2, v4, v6, v8
> +// CHECK: v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0x80,0xd2,0x04,0x0d,0x22,0x04]
> +
> +
> +
> +
> +
> diff --git a/test/MC/R600/vopc.s b/test/MC/R600/vopc.s
> new file mode 100644
> index 0000000..f44919a
> --- /dev/null
> +++ b/test/MC/R600/vopc.s
> @@ -0,0 +1,40 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Generic Checks
> +//===----------------------------------------------------------------------===//
> +
> +// src0 sgpr
> +v_cmp_lt_f32 vcc, s2, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, s2, v4 ; encoding: [0x02,0x08,0x02,0x7c]
> +
> +// src0 inline immediate
> +v_cmp_lt_f32 vcc, 0, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, 0, v4 ; encoding: [0x80,0x08,0x02,0x7c]
> +
> +// src0 literal
> +v_cmp_lt_f32 vcc, 10.0, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, 0x41200000, v4 ; encoding: [0xff,0x08,0x02,0x7c,0x00,0x00,0x20,0x41]
> +
> +// src0, src1 max vgpr
> +v_cmp_lt_f32 vcc, v255, v255
> +// CHECK: v_cmp_lt_f32_e32 vcc, v255, v255 ; encoding: [0xff,0xff,0x03,0x7c]
> +
> +// force 32-bit encoding
> +v_cmp_lt_f32_e32 vcc, v2, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
> +
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +v_cmp_f_f32 vcc, v2, v4
> +// CHECK: v_cmp_f_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x00,0x7c]
> +
> +v_cmp_lt_f32 vcc, v2, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
> +
> +// TODO: Add tests for the rest of the instructions.
> +
> -- 
> 1.8.1.5
> 

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list