PATCH: R600/SI: Experimental assembler / inline assembly support
Tom Stellard
tom at stellard.net
Tue Apr 7 10:52:45 PDT 2015
Ping.
On Fri, Mar 20, 2015 at 05:41:58PM -0400, Tom Stellard wrote:
> Hi Matt,
>
> Here are updated patches that address the rest of your comments,
> except for the one about the float cast, because I wasn't sure how to
> fix it.
>
> -Tom
>
> On Fri, Mar 20, 2015 at 11:43:17AM -0400, Tom Stellard wrote:
> > Hi Matt,
> >
> > I'm working on an update patch. Here are some responses to your
> > comments:
> >
> > On Fri, Mar 13, 2015 at 10:47:38AM -0700, Matt Arsenault wrote:
> > > > + case Match_InvalidOperand: {
> > > > + SMLoc ErrorLoc = IDLoc;
> > > > + if (ErrorInfo != ~0ULL) {
> > > > + if (ErrorInfo >= Operands.size()) {
> > > > + return Error(IDLoc, "too few operands for instruction");
> > > > + }
> > > >
> > > > + ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
> > > Casting to a reference always looks weird, and there are a lot of these
> > > in this patch. Why do you need to do this? Can you not cast the pointer
> > > type before the deref and use -> for some weird reason?
> >
> > Operands is a vector of std::unique_ptr, so you can't cast is as a
> > regular pointer type. Every other assembler casts as reference
> > like this.
> >
> > > > @@ -195,17 +564,104 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
> > > >
> > > > // If we successfully parsed the operand or if there as an error parsing,
> > > > // we are done.
> > > > - if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
> > > > + //
> > > > + // If we are parsing after we reach EndOfStatement then this means we
> > > > + // are appending default values to the Operands list. This is only done
> > > > + // by custom parser, so we shouldn't continue on to the generic parsing.
> > > > + if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
> > > > + getLexer().is(AsmToken::EndOfStatement))
> > > > return ResTy;
> > > >
> > > > + bool Negate = false, Abs = false;
> > > > + if (getLexer().getKind()== AsmToken::Minus) {
> > > > + Parser.Lex();
> > > > + Negate = true;
> > > > + }
> > > > +
> > > > + if (getLexer().getKind() == AsmToken::Pipe) {
> > > > + Parser.Lex();
> > > > + Abs = true;
> > > > + }
> > > > +
> > > > switch(getLexer().getKind()) {
> > > > case AsmToken::Integer: {
> > > > + SMLoc S = Parser.getTok().getLoc();
> > > > + int64_t IntVal;
> > > > + if (getParser().parseAbsoluteExpression(IntVal))
> > > > + return MatchOperand_ParseFail;
> > > > + APInt IntVal32(32, IntVal);
> > > > + if (IntVal32.getSExtValue() != IntVal) {
> > > > + Error(S, "invalid immediate: only 32-bit values are legal");
> > > > + return MatchOperand_ParseFail;
> > > > + }
> > > > +
> > > > + IntVal = IntVal32.getSExtValue();
> > > > + if (Negate)
> > > > + IntVal *= -1;
> > > > + Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
> > > > + return MatchOperand_Success;
> > > > + }
> > > > + case AsmToken::Real: {
> > > > + // FIXME: We should emit an error if a double precisions floating-point
> > > > + // value is used. I'm not sure the best way to detect this.
> > > > + SMLoc S = Parser.getTok().getLoc();
> > > > int64_t IntVal;
> > > > if (getParser().parseAbsoluteExpression(IntVal))
> > > > return MatchOperand_ParseFail;
> > > > - Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
> > > > +
> > > > + APFloat F((float)APInt(64, IntVal).bitsToDouble());
> > > You should be able to avoid using the host float cast here
> >
> > What should I do instead?
> >
> > > > + if (Negate)
> > > > + F.changeSign();
> > > > + Operands.push_back(
> > > > + AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
> > > > return MatchOperand_Success;
> > > > }
> > > > + case AsmToken::Identifier: {
> > > > + SMLoc S, E;
> > > > + unsigned RegNo;
> > > > + if (!ParseRegister(RegNo, S, E)) {
> > > > +
> > > > + bool HasModifiers = operandsHaveModifiers(Operands);
> > > > + unsigned Modifiers = 0;
> > > > +
> > > > + if (Negate)
> > > > + Modifiers |= 0x1;
> > > > +
> > > > + if (Abs) {
> > > > + if (getLexer().getKind() != AsmToken::Pipe)
> > > > + return MatchOperand_ParseFail;
> > > > + Parser.Lex();
> > > > + Modifiers |= 0x2;
> > > > + }
> > > > +
> > > > + if (Modifiers && !HasModifiers) {
> > > > + // We are adding a modifier to src1 or src2 and previous sources
> > > > + // don't have modifiers, so we need to go back and empty modifers
> > > > + // for each previous source.
> > > > + for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
> > > > + --PrevRegIdx) {
> > > > +
> > > > + AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
> > > > + RegOp.setModifiers(0);
> > > > + }
> > > > + }
> > > > +
> > > > +
> > > > + Operands.push_back(AMDGPUOperand::CreateReg(
> > > > + RegNo, S, E, getContext().getRegisterInfo()));
> > > > +
> > > > + if (HasModifiers || Modifiers) {
> > > > + AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
> > > > + RegOp.setModifiers(Modifiers);
> > > > +
> > > > + }
> > > > + } else {
> > > > + Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
> > > > + S));
> > > > + Parser.Lex();
> > > > + }
> > > > + return MatchOperand_Success;
> > > > + }
> > > > default:
> > > > return MatchOperand_NoMatch;
> > > > }
> > > > +void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
> > > > + const OperandVector &Operands) {
> > > > + unsigned i = 1;
> > > > +
> > > > + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> > > > +
> > > > + for (unsigned e = Operands.size(); i != e; ++i) {
> > > This loop condition looks weird. You don't seem to be using the i after
> > > the loop, so its definition should move into the for
> > > > + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> > > > +
> > > > + // Add the register arguments
> > > > + if (Op.isReg()) {
> > > > + Op.addRegOperands(Inst, 1);
> > > > + continue;
> > > > + }
> > > > +
> > > > + // Handle the case where soffset is an immediate
> > > > + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
> > > > + Op.addImmOperands(Inst, 1);
> > > > + continue;
> > > > + }
> > > > +
> > > > + // Handle tokens like 'offen' which are sometimes hard-coded into the
> > > > + // asm string. There are no MCInst operands for these.
> > > > + if (Op.isToken()) {
> > > > + continue;
> > > > + }
> > > > + assert(Op.isImm());
> > > > +
> > > > + // Handle optional arguments
> > > > + OptionalIdx[Op.getImmTy()] = i;
> > > > + }
> > > > +
> > > > + assert(OptionalIdx.size() == 4);
> > > > +
> > > > + unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
> > > > + unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
> > > > + unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
> > > > + unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
> > > > +
> > > > + ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
> > > > + ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
> > > > + ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
> > > > + ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
> > > Is this defaulting Offset/GLC/SLC/TFE to 1? Shouldn't these be 0?
> >
> > No, 1 is the number of operands that should be added to the instruction.
> > > > +}
> > > > +
> > > > +//===----------------------------------------------------------------------===//
> > > > +// SI Inline Assembly Support
> > > > +//===----------------------------------------------------------------------===//
> > > > +
> > > > +std::pair<unsigned, const TargetRegisterClass *>
> > > > +SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
> > > > + const std::string &Constraint,
> > > StringRef operand instead?
> >
> > It's a virtual function, so I'm don't think I can change it.
> >
> > > > + MVT VT) const {
> > > > + dbgs() << "Constraint = " << Constraint << "\n";
> > > > + dbgs() << "VT = " << EVT(VT).getEVTString() << "\n";
> > > Leftover debug printing
> > > > + if (Constraint == "r") {
> > > > + switch(VT.SimpleTy) {
> > > > + default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
> > > > + case MVT::i64:
> > > > + return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
> > > > + case MVT::i32:
> > > > + return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
> > > > + }
> > > > + }
> > > > +
> > > > + if (Constraint.size() > 1) {
> > > > + const TargetRegisterClass *RC = nullptr;
> > > > + if (Constraint[1] == 'v') {
> > > > + RC = &AMDGPU::VGPR_32RegClass;
> > > > + } else if (Constraint[1] == 's') {
> > > > + RC = &AMDGPU::SGPR_32RegClass;
> > > > + }
> > > > +
> > > > + if (RC) {
> > > > + unsigned Idx = std::atoi(Constraint.substr(2).c_str());
> > > > + if (Idx < RC->getNumRegs())
> > > > + return std::make_pair(RC->getRegister(Idx), RC);
> > > > + }
> > > > + }
> > > > + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
> > > > +}
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> From fa9638c2f13d1512c1a72fdd242f92e958a570be Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Fri, 20 Mar 2015 13:54:28 -0400
> Subject: [PATCH 1/3] R600/SI: Don't print offset0/offset1 DS operands when
> they are 0
>
> ---
> lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 12 ++++++---
> .../ds-negative-offset-addressing-mode-loop.ll | 2 +-
> test/CodeGen/R600/ds_read2.ll | 24 ++++++++---------
> test/CodeGen/R600/ds_read2st64.ll | 8 +++---
> test/CodeGen/R600/ds_write2.ll | 30 +++++++++++-----------
> test/CodeGen/R600/ds_write2st64.ll | 6 ++---
> test/CodeGen/R600/unaligned-load-store.ll | 4 +--
> 7 files changed, 45 insertions(+), 41 deletions(-)
>
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> index d62fd3f..c7f9da6 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -89,14 +89,18 @@ void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
>
> void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
> raw_ostream &O) {
> - O << " offset0:";
> - printU8ImmDecOperand(MI, OpNo, O);
> + if (MI->getOperand(OpNo).getImm()) {
> + O << " offset0:";
> + printU8ImmDecOperand(MI, OpNo, O);
> + }
> }
>
> void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
> raw_ostream &O) {
> - O << " offset1:";
> - printU8ImmDecOperand(MI, OpNo, O);
> + if (MI->getOperand(OpNo).getImm()) {
> + O << " offset1:";
> + printU8ImmDecOperand(MI, OpNo, O);
> + }
> }
>
> void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
> diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
> index c381fc4..e7e13d6 100644
> --- a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
> +++ b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
> @@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.barrier.local() #1
> ; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
> ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
>
> -; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
> +; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1
> ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
> ; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
> ; CHECK: s_endpgm
> diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll
> index f53b6c0..0f63026 100644
> --- a/test/CodeGen/R600/ds_read2.ll
> +++ b/test/CodeGen/R600/ds_read2.ll
> @@ -7,7 +7,7 @@
> @lds.f64 = addrspace(3) global [512 x double] undef, align 8
>
> ; SI-LABEL: @simple_read2_f32
> -; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
> +; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8
> ; SI: s_waitcnt lgkmcnt(0)
> ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
> ; SI: buffer_store_dword [[RESULT]]
> @@ -26,7 +26,7 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 {
> }
>
> ; SI-LABEL: @simple_read2_f32_max_offset
> -; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
> +; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255
> ; SI: s_waitcnt lgkmcnt(0)
> ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
> ; SI: buffer_store_dword [[RESULT]]
> @@ -63,7 +63,7 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
> }
>
> ; SI-LABEL: @simple_read2_f32_x2
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
> ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
> ; SI: s_endpgm
> define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
> @@ -94,7 +94,7 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
>
> ; Make sure there is an instruction between the two sets of reads.
> ; SI-LABEL: @simple_read2_f32_x2_barrier
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
> ; SI: s_barrier
> ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
> ; SI: s_endpgm
> @@ -313,7 +313,7 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs
>
> ; SI-LABEL: @simple_read2_f64
> ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
> -; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
> +; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset1:8
> ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
> ; SI: buffer_store_dwordx2 [[RESULT]]
> ; SI: s_endpgm
> @@ -331,7 +331,7 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 {
> }
>
> ; SI-LABEL: @simple_read2_f64_max_offset
> -; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
> +; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255
> ; SI: s_endpgm
> define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -366,7 +366,7 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
>
> ; Alignment only 4
> ; SI-LABEL: @misaligned_read2_f64
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
> ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
> ; SI: s_endpgm
> define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
> @@ -386,7 +386,7 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)
>
> ; SI-LABEL: @load_constant_adjacent_offsets
> ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
> define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
> %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
> %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
> @@ -397,7 +397,7 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
>
> ; SI-LABEL: @load_constant_disjoint_offsets
> ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2
> define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
> %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
> %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
> @@ -410,7 +410,7 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
>
> ; SI-LABEL: @load_misaligned64_constant_offsets
> ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
> ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
> define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
> %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
> @@ -425,8 +425,8 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
> ; SI-LABEL: @load_misaligned64_constant_large_offsets
> ; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
> ; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
> -; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
> -; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
> +; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1
> +; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1
> ; SI: s_endpgm
> define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
> %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
> diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll
> index 482debb..54b3b45 100644
> --- a/test/CodeGen/R600/ds_read2st64.ll
> +++ b/test/CodeGen/R600/ds_read2st64.ll
> @@ -5,7 +5,7 @@
>
>
> ; SI-LABEL: @simple_read2st64_f32_0_1
> -; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
> ; SI: s_waitcnt lgkmcnt(0)
> ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
> ; SI: buffer_store_dword [[RESULT]]
> @@ -117,7 +117,7 @@ define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
> }
>
> ; SI-LABEL: @simple_read2st64_f64_0_1
> -; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
> ; SI: s_waitcnt lgkmcnt(0)
> ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
> ; SI: buffer_store_dwordx2 [[RESULT]]
> @@ -158,7 +158,7 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac
> ; Alignment only
>
> ; SI-LABEL: @misaligned_read2st64_f64
> -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
> +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
> ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
> ; SI: s_endpgm
> define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
> @@ -237,7 +237,7 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double
>
> ; SI-LABEL: @byte_size_only_divisible_64_read2_f64
> ; SI-NOT: ds_read2st_b64
> -; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
> +; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8
> ; SI: s_endpgm
> define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/R600/ds_write2.ll
> index d06f780..60bcbcf 100644
> --- a/test/CodeGen/R600/ds_write2.ll
> +++ b/test/CodeGen/R600/ds_write2.ll
> @@ -7,7 +7,7 @@
> ; SI-LABEL: @simple_write2_one_val_f32
> ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
> ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
> +; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8
> ; SI: s_endpgm
> define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -25,7 +25,7 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1
> ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
> ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
> ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
> +; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
> ; SI: s_endpgm
> define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -84,7 +84,7 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float
> ; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
> ; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
> ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
> +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
> ; SI: s_endpgm
> define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -105,7 +105,7 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2
> ; SI-LABEL: @simple_write2_two_val_subreg2_f32
> ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
> ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
> +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
> ; SI: s_endpgm
> define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -124,7 +124,7 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa
> ; SI-LABEL: @simple_write2_two_val_subreg4_f32
> ; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
> ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
> +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
> ; SI: s_endpgm
> define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -144,7 +144,7 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa
> ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
> ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
> ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
> +; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
> ; SI: s_endpgm
> define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -179,7 +179,7 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add
> }
>
> ; SI-LABEL: @simple_write2_two_val_f32_x2
> -; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
> +; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8
> ; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
> ; SI: s_endpgm
> define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
> @@ -268,7 +268,7 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add
> ; SI-LABEL: @simple_write2_one_val_f64
> ; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
> ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
> -; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
> +; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8
> ; SI: s_endpgm
> define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -285,7 +285,7 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace
> ; SI-LABEL: @misaligned_simple_write2_one_val_f64
> ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
> ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
> -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1
> +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1
> ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
> ; SI: s_endpgm
> define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
> @@ -304,7 +304,7 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl
> ; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
> ; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
> ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
> -; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
> +; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
> ; SI: s_endpgm
> define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -324,7 +324,7 @@ define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace
>
> ; SI-LABEL: @store_constant_adjacent_offsets
> ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
> define void @store_constant_adjacent_offsets() {
> store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
> store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
> @@ -334,7 +334,7 @@ define void @store_constant_adjacent_offsets() {
> ; SI-LABEL: @store_constant_disjoint_offsets
> ; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
> ; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
> +; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2
> define void @store_constant_disjoint_offsets() {
> store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
> store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
> @@ -345,7 +345,7 @@ define void @store_constant_disjoint_offsets() {
>
> ; SI-LABEL: @store_misaligned64_constant_offsets
> ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> -; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
> ; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
> define void @store_misaligned64_constant_offsets() {
> store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
> @@ -358,8 +358,8 @@ define void @store_misaligned64_constant_offsets() {
> ; SI-LABEL: @store_misaligned64_constant_large_offsets
> ; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
> ; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
> -; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> -; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
> +; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
> ; SI: s_endpgm
> define void @store_misaligned64_constant_large_offsets() {
> store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
> diff --git a/test/CodeGen/R600/ds_write2st64.ll b/test/CodeGen/R600/ds_write2st64.ll
> index 2044df2..1d9d881 100644
> --- a/test/CodeGen/R600/ds_write2st64.ll
> +++ b/test/CodeGen/R600/ds_write2st64.ll
> @@ -7,7 +7,7 @@
> ; SI-LABEL: @simple_write2st64_one_val_f32_0_1
> ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
> ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1
> +; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1
> ; SI: s_endpgm
> define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -46,7 +46,7 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add
> ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
> ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
> ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
> -; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
> +; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
> ; SI: s_endpgm
> define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> @@ -85,7 +85,7 @@ define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, d
>
> ; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
> ; SI-NOT: ds_write2st64_b64
> -; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
> +; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8
> ; SI: s_endpgm
> define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
> %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
> diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
> index efb1de2..82d88eb 100644
> --- a/test/CodeGen/R600/unaligned-load-store.ll
> +++ b/test/CodeGen/R600/unaligned-load-store.ll
> @@ -195,7 +195,7 @@ define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out,
>
> ; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
> ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
> -; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
> +; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
> ; SI: s_endpgm
> define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
> %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
> @@ -243,7 +243,7 @@ define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
>
> ; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
> ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
> -; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
> ; SI: s_endpgm
> define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
> %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
> --
> 1.8.1.5
>
> From e8a8d61e6b2df9c251914220536e281dade8b35c Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Fri, 20 Mar 2015 16:11:57 -0400
> Subject: [PATCH 2/3] R600/SI: Add missing SOPK instructions
>
> ---
> lib/Target/R600/SIInstrFormats.td | 13 ++++++++++++
> lib/Target/R600/SIInstrInfo.td | 44 +++++++++++++++++++++++++++++++++++----
> lib/Target/R600/SIInstructions.td | 28 +++++++++++++++++--------
> 3 files changed, 72 insertions(+), 13 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 4167590..e7a07a1 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -181,6 +181,19 @@ class SOPKe <bits<5> op> : Enc32 {
> let Inst{31-28} = 0xb; //encoding
> }
>
> +class SOPK64e <bits<5> op> : Enc64 {
> + bits <7> sdst = 0;
> + bits <16> simm16;
> + bits <32> imm;
> +
> + let Inst{15-0} = simm16;
> + let Inst{22-16} = sdst;
> + let Inst{27-23} = op;
> + let Inst{31-28} = 0xb;
> +
> + let Inst{63-32} = imm;
> +}
> +
> class SOPPe <bits<7> op> : Enc32 {
> bits <16> simm16;
>
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 86e1082..345e699 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -546,6 +546,16 @@ class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
> SOPKe <op.VI>,
> SIMCInstr<opName, SISubtarget.VI>;
>
> +multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
> + string asm = opName#opAsm> {
> + def "" : SOPK_Pseudo <opName, outs, ins, []>;
> +
> + def _si : SOPK_Real_si <op, opName, outs, ins, asm>;
> +
> + def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>;
> +
> +}
> +
> multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
> def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0),
> pattern>;
> @@ -561,13 +571,39 @@ multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
> def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
> (ins SReg_32:$src0, u16imm:$src1), pattern>;
>
> - def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
> - (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
> + let DisableEncoding = "$dst" in {
> + def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
> + (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
>
> - def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
> - (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
> + def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
> + (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
> + }
> }
>
> +multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m <
> + op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16),
> + " $sdst, $simm16"
> +>;
> +
> +multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
> + string argAsm, string asm = opName#argAsm> {
> +
> + def "" : SOPK_Pseudo <opName, outs, ins, []>;
> +
> + def _si : SOPK <outs, ins, asm, []>,
> + SOPK64e <op.SI>,
> + SIMCInstr<opName, SISubtarget.SI> {
> + let AssemblerPredicates = [isSICI];
> + let isCodeGenOnly = 0;
> + }
> +
> + def _vi : SOPK <outs, ins, asm, []>,
> + SOPK64e <op.VI>,
> + SIMCInstr<opName, SISubtarget.VI> {
> + let AssemblerPredicates = [isVI];
> + let isCodeGenOnly = 0;
> + }
> +}
> //===----------------------------------------------------------------------===//
> // SMRD classes
> //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 5f02a31..d6e4986 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -384,6 +384,7 @@ defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32",
> >;
> */
>
> +defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>;
> defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>;
> defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>;
> defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>;
> @@ -397,18 +398,27 @@ defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>;
> defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>;
> } // End isCompare = 1
>
> -let isCommutable = 1 in {
> - let Defs = [SCC], isCommutable = 1 in {
> - defm S_ADDK_I32 : SOPK_32 <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
> - }
> - defm S_MULK_I32 : SOPK_32 <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
> +let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
> + Constraints = "$sdst = $src0" in {
> + defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
> + defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
> }
>
> -//defm S_CBRANCH_I_FORK : SOPK_ <sopk<0x11, 0x10>, "s_cbranch_i_fork", []>;
> +defm S_CBRANCH_I_FORK : SOPK_m <
> + sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs),
> + (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16"
> +>;
> defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>;
> -defm S_SETREG_B32 : SOPK_32 <sopk<0x13, 0x12>, "s_setreg_b32", []>;
> -defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
> -//defm S_SETREG_IMM32_B32 : SOPK_32 <sopk<0x15, 0x14>, "s_setreg_imm32_b32", []>;
> +defm S_SETREG_B32 : SOPK_m <
> + sopk<0x13, 0x12>, "s_setreg_b32", (outs),
> + (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16"
> +>;
> +// FIXME: Not on SI?
> +//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
> +defm S_SETREG_IMM32_B32 : SOPK_IMM32 <
> + sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs),
> + (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16"
> +>;
>
> //===----------------------------------------------------------------------===//
> // SOPP Instructions
> --
> 1.8.1.5
>
> From 86d7bb8f3b90415db396ac688e42ac89fe9ac208 Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Fri, 14 Nov 2014 06:22:05 -0500
> Subject: [PATCH 3/3] R600/SI: Initial support for assembler and inline
> assembly
>
> This is currently considered experimental, but most of the more
> commonly used instructions should work.
>
> So far only SI has been extensively tested, CI and VI probably work too,
> but may be buggy. The current set of tests cases do not give complete
> coverage, but I think it is sufficient for an experimental assembler.
>
> See the documentation in R600Usage for more information.
> ---
> docs/R600Usage.rst | 60 +-
> lib/Target/R600/AMDGPU.td | 24 +-
> lib/Target/R600/AMDGPUAsmPrinter.cpp | 22 +
> lib/Target/R600/AMDGPUAsmPrinter.h | 4 +
> lib/Target/R600/AMDGPUSubtarget.cpp | 1 +
> lib/Target/R600/AMDGPUSubtarget.h | 3 +
> lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp | 1095 +++++++++++++++++++--
> lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 5 +-
> lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 2 +
> lib/Target/R600/SIISelLowering.cpp | 35 +
> lib/Target/R600/SIISelLowering.h | 4 +
> lib/Target/R600/SIInstrFormats.td | 18 +-
> lib/Target/R600/SIInstrInfo.td | 228 ++++-
> lib/Target/R600/SIInstructions.td | 15 +-
> lib/Target/R600/SIRegisterInfo.td | 47 +-
> test/MC/R600/ds-err.s | 23 +
> test/MC/R600/ds.s | 337 +++++++
> test/MC/R600/mubuf.s | 352 +++++++
> test/MC/R600/smrd.s | 32 +
> test/MC/R600/sop1-err.s | 37 +
> test/MC/R600/sop1.s | 177 ++++
> test/MC/R600/sop2.s | 131 +++
> test/MC/R600/sopc.s | 9 +
> test/MC/R600/sopk.s | 66 ++
> test/MC/R600/sopp.s | 14 +-
> test/MC/R600/vop1.s | 182 ++++
> test/MC/R600/vop2-err.s | 35 +
> test/MC/R600/vop2.s | 242 +++++
> test/MC/R600/vop3.s | 138 +++
> test/MC/R600/vopc.s | 40 +
> 30 files changed, 3236 insertions(+), 142 deletions(-)
> create mode 100644 test/MC/R600/ds-err.s
> create mode 100644 test/MC/R600/ds.s
> create mode 100644 test/MC/R600/mubuf.s
> create mode 100644 test/MC/R600/smrd.s
> create mode 100644 test/MC/R600/sop1-err.s
> create mode 100644 test/MC/R600/sop1.s
> create mode 100644 test/MC/R600/sop2.s
> create mode 100644 test/MC/R600/sopc.s
> create mode 100644 test/MC/R600/sopk.s
> create mode 100644 test/MC/R600/vop1.s
> create mode 100644 test/MC/R600/vop2-err.s
> create mode 100644 test/MC/R600/vop2.s
> create mode 100644 test/MC/R600/vop3.s
> create mode 100644 test/MC/R600/vopc.s
>
> diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst
> index 48a30c8..093cdd7 100644
> --- a/docs/R600Usage.rst
> +++ b/docs/R600Usage.rst
> @@ -6,22 +6,51 @@ Introduction
> ============
>
> The R600 back-end provides ISA code generation for AMD GPUs, starting with
> -the R600 family up until the current Sea Islands (GCN Gen 2).
> +the R600 family up until the current Volcanic Islands (GCN Gen 3).
>
>
> Assembler
> =========
>
> -The assembler is currently a work in progress and not yet complete. Below
> -are the currently supported features.
> +The assembler is currently considered experimental.
> +
> +For syntax examples look in test/MC/R600.
> +
> +Below some of the currently supported features (modulo bugs). These
> +all apply to the Southern Islands ISA, Sea Islands and Volcanic Islands
> +are also supported but may be missing some instructions and have more bugs:
> +
> +DS Instructions
> +---------------
> +All DS instructions are supported.
> +
> +MUBUF Instructions
> +------------------
> +All non-atomic MUBUF instructions are supported.
> +
> +SMRD Instructions
> +-----------------
> +Only the s_load_dword* SMRD instructions are supported.
> +
> +SOP1 Instructions
> +-----------------
> +All SOP1 instructions are supported.
> +
> +SOP2 Instructions
> +-----------------
> +All SOP2 instructions are supported.
> +
> +SOPC Instructions
> +-----------------
> +All SOPC instructions are supported.
>
> SOPP Instructions
> -----------------
>
> -Unless otherwise mentioned, all SOPP instructions that with an operand
> -accept a integer operand(s) only. No verification is performed on the
> -operands, so it is up to the programmer to be familiar with the range
> -or acceptable values.
> +Unless otherwise mentioned, all SOPP instructions that have one or more
> +operands accept integer operands only. No verification is performed
> +on the operands, so it is up to the programmer to be familiar with the
> +range or acceptable values.
>
> s_waitcnt
> ^^^^^^^^^
> @@ -41,3 +70,20 @@ wait for.
> // Wait for vmcnt counter to be 1.
> s_waitcnt vmcnt(1)
>
> +VOP1, VOP2, VOP3, VOPC Instructions
> +-----------------------------------
> +
> +All 32-bit and 64-bit encodings should work.
> +
> +The assembler will automatically detect which encoding size to use for
> +VOP1, VOP2, and VOPC instructions based on the operands. If you want to force
> +a specific encoding size, you can add an _e32 (for 32-bit encoding) or
> +_e64 (for 64-bit encoding) suffix to the instruction. Most, but not all
> +instructions support an explicit suffix. These are all valid assembly
> +strings:
> +
> +.. code-block:: nasm
> +
> + v_mul_i32_i24 v1, v2, v3
> + v_mul_i32_i24_e32 v1, v2, v3
> + v_mul_i32_i24_e64 v1, v2, v3
> diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
> index e5d5ce2..2eb805e 100644
> --- a/lib/Target/R600/AMDGPU.td
> +++ b/lib/Target/R600/AMDGPU.td
> @@ -133,6 +133,20 @@ class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
> !cast<string>(Value),
> "The size of local memory in bytes">;
>
> +def FeatureGCN : SubtargetFeature<"gcn",
> + "IsGCN",
> + "true",
> + "GCN or newer GPU">;
> +
> +def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
> + "GCN1Encoding",
> + "true",
> + "Encoding format for SI and CI">;
> +
> +def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
> + "GCN3Encoding",
> + "true",
> + "Encoding format for VI">;
> class SubtargetFeatureGeneration <string Value,
> list<SubtargetFeature> Implies> :
> SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
> @@ -158,15 +172,17 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
>
> def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
> [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
> - FeatureWavefrontSize64]>;
> + FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding]>;
>
> def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
> [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
> - FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
> + FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
> + FeatureGCN1Encoding]>;
>
> def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
> [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
> - FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
> + FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
> + FeatureGCN3Encoding]>;
>
> //===----------------------------------------------------------------------===//
>
> @@ -197,8 +213,10 @@ def NullALU : InstrItinClass;
>
> class PredicateControl {
> Predicate SubtargetPredicate;
> + list<Predicate> AssemblerPredicates = [];
> list<Predicate> OtherPredicates = [];
> list<Predicate> Predicates = !listconcat([SubtargetPredicate],
> + AssemblerPredicates,
> OtherPredicates);
> }
>
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> index 5e1b6a3..b7a48c3 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> @@ -17,6 +17,7 @@
> //
>
> #include "AMDGPUAsmPrinter.h"
> +#include "InstPrinter/AMDGPUInstPrinter.h"
> #include "AMDGPU.h"
> #include "AMDKernelCodeT.h"
> #include "AMDGPUSubtarget.h"
> @@ -577,3 +578,24 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
>
> OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header)));
> }
> +
> +bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
> + unsigned AsmVariant,
> + const char *ExtraCode, raw_ostream &O) {
> + if (ExtraCode && ExtraCode[0]) {
> + if (ExtraCode[1] != 0)
> + return true; // Unknown modifier.
> +
> + switch (ExtraCode[0]) {
> + default:
> + // See if this is a generic print operand
> + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
> + case 'r':
> + break;
> + }
> + }
> +
> + AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
> + *TM.getSubtargetImpl()->getRegisterInfo());
> + return false;
> +}
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
> index 58ffb1e..824cc43 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.h
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.h
> @@ -99,6 +99,10 @@ public:
>
> void EmitEndOfAsmFile(Module &M) override;
>
> + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
> + unsigned AsmVariant, const char *ExtraCode,
> + raw_ostream &O);
> +
> protected:
> std::vector<std::string> DisasmLines, HexLines;
> size_t DisasmLineMaxLen;
> diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
> index 0ead652..259224a 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.cpp
> +++ b/lib/Target/R600/AMDGPUSubtarget.cpp
> @@ -71,6 +71,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
> EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
> WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
> EnableVGPRSpilling(false), SGPRInitBug(false),
> + IsGCN(false), GCN1Encoding(false), GCN3Encoding(false),
> FrameLowering(TargetFrameLowering::StackGrowsUp,
> 64 * 16, // Maximum stack alignment (long16)
> 0),
> diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
> index 403a3e4..aeb0817 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.h
> +++ b/lib/Target/R600/AMDGPUSubtarget.h
> @@ -71,6 +71,9 @@ private:
> int LocalMemorySize;
> bool EnableVGPRSpilling;
> bool SGPRInitBug;
> + bool IsGCN;
> + bool GCN1Encoding;
> + bool GCN3Encoding;
>
> AMDGPUFrameLowering FrameLowering;
> std::unique_ptr<AMDGPUTargetLowering> TLInfo;
> diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> index 3b4ba1a..bd202a1 100644
> --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> @@ -8,6 +8,8 @@
> //===----------------------------------------------------------------------===//
>
> #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
> +#include "SIDefines.h"
> +#include "llvm/ADT/APFloat.h"
> #include "llvm/ADT/SmallString.h"
> #include "llvm/ADT/SmallVector.h"
> #include "llvm/ADT/STLExtras.h"
> @@ -27,77 +29,105 @@
> #include "llvm/Support/SourceMgr.h"
> #include "llvm/Support/TargetRegistry.h"
> #include "llvm/Support/raw_ostream.h"
> +#include "llvm/Support/Debug.h"
>
> using namespace llvm;
>
> namespace {
>
> -class AMDGPUAsmParser : public MCTargetAsmParser {
> - MCSubtargetInfo &STI;
> - MCAsmParser &Parser;
> -
> -
> - /// @name Auto-generated Match Functions
> - /// {
> -
> -#define GET_ASSEMBLER_HEADER
> -#include "AMDGPUGenAsmMatcher.inc"
> -
> - /// }
> -
> -public:
> - AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
> - const MCInstrInfo &_MII,
> - const MCTargetOptions &Options)
> - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
> - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
> - }
> - bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
> - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
> - OperandVector &Operands, MCStreamer &Out,
> - uint64_t &ErrorInfo,
> - bool MatchingInlineAsm) override;
> - bool ParseDirective(AsmToken DirectiveID) override;
> - OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
> - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
> - SMLoc NameLoc, OperandVector &Operands) override;
> -
> - bool parseCnt(int64_t &IntVal);
> - OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
> -};
> +struct OptionalOperand;
>
> class AMDGPUOperand : public MCParsedAsmOperand {
> enum KindTy {
> Token,
> - Immediate
> + Immediate,
> + Register,
> + Expression
> } Kind;
>
> + SMLoc StartLoc, EndLoc;
> +
> public:
> AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
>
> + MCContext *Ctx;
> +
> + enum ImmTy {
> + ImmTyNone,
> + ImmTyDSOffset0,
> + ImmTyDSOffset1,
> + ImmTyGDS,
> + ImmTyOffset,
> + ImmTyGLC,
> + ImmTySLC,
> + ImmTyTFE,
> + ImmTyClamp,
> + ImmTyOMod
> + };
> +
> struct TokOp {
> const char *Data;
> unsigned Length;
> };
>
> struct ImmOp {
> + bool IsFPImm;
> + ImmTy Type;
> int64_t Val;
> };
>
> + struct RegOp {
> + unsigned RegNo;
> + int Modifiers;
> + const MCRegisterInfo *TRI;
> + };
> +
> union {
> TokOp Tok;
> ImmOp Imm;
> + RegOp Reg;
> + const MCExpr *Expr;
> };
>
> void addImmOperands(MCInst &Inst, unsigned N) const {
> Inst.addOperand(MCOperand::CreateImm(getImm()));
> }
> - void addRegOperands(MCInst &Inst, unsigned N) const {
> - llvm_unreachable("addRegOperands");
> - }
> +
> StringRef getToken() const {
> return StringRef(Tok.Data, Tok.Length);
> }
> +
> + void addRegOperands(MCInst &Inst, unsigned N) const {
> + Inst.addOperand(MCOperand::CreateReg(getReg()));
> + }
> +
> + void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
> + if (isReg())
> + addRegOperands(Inst, N);
> + else
> + addImmOperands(Inst, N);
> + }
> +
> + void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
> + Inst.addOperand(MCOperand::CreateImm(Reg.Modifiers));
> + addRegOperands(Inst, N);
> + }
> +
> + void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
> + if (isImm())
> + addImmOperands(Inst, N);
> + else {
> + assert(isExpr());
> + Inst.addOperand(MCOperand::CreateExpr(Expr));
> + }
> + }
> +
> + bool defaultTokenHasSuffix() const {
> + StringRef Token(Tok.Data, Tok.Length);
> +
> + return Token.endswith("_e32") || Token.endswith("_e64");
> + }
> +
> bool isToken() const override {
> return Kind == Token;
> }
> @@ -106,52 +136,369 @@ public:
> return Kind == Immediate;
> }
>
> + bool isInlineImm() const {
> + float F = BitsToFloat(Imm.Val);
> + // TODO: Add 0.5pi for VI
> + return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
> + (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
> + F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
> + }
> +
> + bool isDSOffset0() const {
> + assert(isImm());
> + return Imm.Type == ImmTyDSOffset0;
> + }
> +
> + bool isDSOffset1() const {
> + assert(isImm());
> + return Imm.Type == ImmTyDSOffset1;
> + }
> +
> int64_t getImm() const {
> return Imm.Val;
> }
>
> + enum ImmTy getImmTy() const {
> + assert(isImm());
> + return Imm.Type;
> + }
> +
> bool isReg() const override {
> - return false;
> + return Kind == Register && Reg.Modifiers == -1;
> + }
> +
> + bool isRegWithInputMods() const {
> + return Kind == Register && Reg.Modifiers != -1;
> + }
> +
> + void setModifiers(unsigned Mods) {
> + assert(isReg());
> + Reg.Modifiers = Mods;
> }
>
> unsigned getReg() const override {
> - return 0;
> + return Reg.RegNo;
> + }
> +
> + bool isRegOrImm() const {
> + return isReg() || isImm();
> + }
> +
> + bool isRegClass(unsigned RCID) const {
> + return Reg.TRI->getRegClass(RCID).contains(getReg());
> + }
> +
> + bool isSCSrc32() const {
> + return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
> + }
> +
> + bool isSSrc32() const {
> + return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
> + }
> +
> + bool isSSrc64() const {
> + return isImm() || isInlineImm() ||
> + (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
> + }
> +
> + bool isVCSrc32() const {
> + return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
> + }
> +
> + bool isVCSrc64() const {
> + return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
> + }
> +
> + bool isVSrc32() const {
> + return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
> + }
> +
> + bool isVSrc64() const {
> + return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
> }
>
> bool isMem() const override {
> return false;
> }
>
> + bool isExpr() const {
> + return Kind == Expression;
> + }
> +
> + bool isSoppBrTarget() const {
> + return isExpr() || isImm();
> + }
> +
> SMLoc getStartLoc() const override {
> - return SMLoc();
> + return StartLoc;
> }
>
> SMLoc getEndLoc() const override {
> - return SMLoc();
> + return EndLoc;
> }
>
> void print(raw_ostream &OS) const override { }
>
> - static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
> + static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
> + enum ImmTy Type = ImmTyNone,
> + bool IsFPImm = false) {
> auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
> Op->Imm.Val = Val;
> + Op->Imm.IsFPImm = IsFPImm;
> + Op->Imm.Type = Type;
> + Op->StartLoc = Loc;
> + Op->EndLoc = Loc;
> return Op;
> }
>
> - static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
> + static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
> + bool HasExplicitEncodingSize = true) {
> auto Res = llvm::make_unique<AMDGPUOperand>(Token);
> Res->Tok.Data = Str.data();
> Res->Tok.Length = Str.size();
> + Res->StartLoc = Loc;
> + Res->EndLoc = Loc;
> return Res;
> }
>
> + static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
> + SMLoc E,
> + const MCRegisterInfo *TRI) {
> + auto Op = llvm::make_unique<AMDGPUOperand>(Register);
> + Op->Reg.RegNo = RegNo;
> + Op->Reg.TRI = TRI;
> + Op->Reg.Modifiers = -1;
> + Op->StartLoc = S;
> + Op->EndLoc = E;
> + return Op;
> + }
> +
> + static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) {
> + auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
> + Op->Expr = Expr;
> + Op->StartLoc = S;
> + Op->EndLoc = S;
> + return Op;
> + }
> +
> + bool isDSOffset() const;
> + bool isDSOffset01() const;
> bool isSWaitCnt() const;
> + bool isMubufOffset() const;
> +};
> +
> +class AMDGPUAsmParser : public MCTargetAsmParser {
> + MCSubtargetInfo &STI;
> + const MCInstrInfo &MII;
> + MCAsmParser &Parser;
> +
> + unsigned ForcedEncodingSize;
> + /// @name Auto-generated Match Functions
> + /// {
> +
> +#define GET_ASSEMBLER_HEADER
> +#include "AMDGPUGenAsmMatcher.inc"
> +
> + /// }
> +
> +public:
> + AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
> + const MCInstrInfo &_MII,
> + const MCTargetOptions &Options)
> + : MCTargetAsmParser(), STI(_STI), MII(_MII), Parser(_Parser),
> + ForcedEncodingSize(0){
> +
> + if (!STI.getFeatureBits()) {
> + // Set default features.
> + STI.ToggleFeature("SOUTHERN_ISLANDS");
> + }
> +
> + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
> + }
> +
> + unsigned getForcedEncodingSize() const {
> + return ForcedEncodingSize;
> + }
> +
> + void setForcedEncodingSize(unsigned Size) {
> + ForcedEncodingSize = Size;
> + }
> +
> + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
> + unsigned checkTargetMatchPredicate(MCInst &Inst) override;
> + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
> + OperandVector &Operands, MCStreamer &Out,
> + uint64_t &ErrorInfo,
> + bool MatchingInlineAsm) override;
> + bool ParseDirective(AsmToken DirectiveID) override;
> + OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
> + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
> + SMLoc NameLoc, OperandVector &Operands) override;
> +
> + OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
> + int64_t Default = 0);
> + OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
> + OperandVector &Operands,
> + enum AMDGPUOperand::ImmTy ImmTy =
> + AMDGPUOperand::ImmTyNone);
> + OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
> + enum AMDGPUOperand::ImmTy ImmTy =
> + AMDGPUOperand::ImmTyNone);
> + OperandMatchResultTy parseOptionalOps(
> + const ArrayRef<OptionalOperand> &OptionalOps,
> + OperandVector &Operands);
> +
> +
> + void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
> + void cvtDS(MCInst &Inst, const OperandVector &Operands);
> + OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
> + OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
> + OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
> +
> + bool parseCnt(int64_t &IntVal);
> + OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
> + OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
> +
> + void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
> + OperandMatchResultTy parseOffset(OperandVector &Operands);
> + OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
> + OperandMatchResultTy parseGLC(OperandVector &Operands);
> + OperandMatchResultTy parseSLC(OperandVector &Operands);
> + OperandMatchResultTy parseTFE(OperandVector &Operands);
> +
> + OperandMatchResultTy parseDMask(OperandVector &Operands);
> + OperandMatchResultTy parseUNorm(OperandVector &Operands);
> + OperandMatchResultTy parseR128(OperandVector &Operands);
> +
> + void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
> + OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
> +};
> +
> +struct OptionalOperand {
> + const char *Name;
> + AMDGPUOperand::ImmTy Type;
> + bool IsBit;
> + int64_t Default;
> + bool (*ConvertResult)(int64_t&);
> };
>
> }
>
> +static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
> + if (IsVgpr) {
> + switch (RegWidth) {
> + default: llvm_unreachable("Unknown register width");
> + case 1: return AMDGPU::VGPR_32RegClassID;
> + case 2: return AMDGPU::VReg_64RegClassID;
> + case 3: return AMDGPU::VReg_96RegClassID;
> + case 4: return AMDGPU::VReg_128RegClassID;
> + case 8: return AMDGPU::VReg_256RegClassID;
> + case 16: return AMDGPU::VReg_512RegClassID;
> + }
> + }
> +
> + switch (RegWidth) {
> + default: llvm_unreachable("Unknown register width");
> + case 1: return AMDGPU::SGPR_32RegClassID;
> + case 2: return AMDGPU::SGPR_64RegClassID;
> + case 4: return AMDGPU::SReg_128RegClassID;
> + case 8: return AMDGPU::SReg_256RegClassID;
> + case 16: return AMDGPU::SReg_512RegClassID;
> + }
> +}
> +
> +static unsigned getRegForName(const StringRef &RegName) {
> +
> + return StringSwitch<unsigned>(RegName)
> + .Case("exec", AMDGPU::EXEC)
> + .Case("vcc", AMDGPU::VCC)
> + .Case("flat_scr", AMDGPU::FLAT_SCR)
> + .Case("m0", AMDGPU::M0)
> + .Case("scc", AMDGPU::SCC)
> + .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
> + .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
> + .Case("vcc_lo", AMDGPU::VCC_LO)
> + .Case("vcc_hi", AMDGPU::VCC_HI)
> + .Case("exec_lo", AMDGPU::EXEC_LO)
> + .Case("exec_hi", AMDGPU::EXEC_HI)
> + .Default(0);
> +}
> +
> bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
> - return true;
> + const AsmToken Tok = Parser.getTok();
> + StartLoc = Tok.getLoc();
> + EndLoc = Tok.getEndLoc();
> + const StringRef &RegName = Tok.getString();
> + RegNo = getRegForName(RegName);
> +
> + if (RegNo) {
> + Parser.Lex();
> + return false;
> + }
> +
> + // Match vgprs and sgprs
> + if (RegName[0] != 's' && RegName[0] != 'v')
> + return true;
> +
> + bool IsVgpr = RegName[0] == 'v';
> + unsigned RegWidth;
> + unsigned RegIndexInClass;
> + if (RegName.size() > 1) {
> + // We have a 32-bit register
> + RegWidth = 1;
> + if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
> + return true;
> + Parser.Lex();
> + } else {
> + // We have a register greater than 32-bits.
> +
> + int64_t RegLo, RegHi;
> + Parser.Lex();
> + if (getLexer().isNot(AsmToken::LBrac))
> + return true;
> +
> + Parser.Lex();
> + if (getParser().parseAbsoluteExpression(RegLo))
> + return true;
> +
> + if (getLexer().isNot(AsmToken::Colon))
> + return true;
> +
> + Parser.Lex();
> + if (getParser().parseAbsoluteExpression(RegHi))
> + return true;
> +
> + if (getLexer().isNot(AsmToken::RBrac))
> + return true;
> +
> + Parser.Lex();
> + RegWidth = (RegHi - RegLo) + 1;
> + if (IsVgpr) {
> + // VGPR registers aren't aligned.
> + RegIndexInClass = RegLo;
> + } else {
> + // SGPR registers are aligned. Max alignment is 4 dwords.
> + RegIndexInClass = RegLo / std::min(RegWidth, 4u);
> + }
> + }
> +
> + const MCRegisterInfo *TRC = getContext().getRegisterInfo();
> + unsigned RC = getRegClass(IsVgpr, RegWidth);
> + if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
> + return true;
> + RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
> + return false;
> +}
> +
> +unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
> +
> + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
> +
> + if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
> + (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
> + return Match_InvalidOperand;
> +
> + return Match_Success;
> }
>
>
> @@ -163,22 +510,30 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
> MCInst Inst;
>
> switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
> - case Match_Success:
> - Inst.setLoc(IDLoc);
> - Out.EmitInstruction(Inst, STI);
> - return false;
> - case Match_MissingFeature:
> - return Error(IDLoc, "instruction use requires an option to be enabled");
> - case Match_MnemonicFail:
> - return Error(IDLoc, "unrecognized instruction mnemonic");
> - case Match_InvalidOperand: {
> - if (ErrorInfo != ~0ULL) {
> - if (ErrorInfo >= Operands.size())
> - return Error(IDLoc, "too few operands for instruction");
> + default: break;
> + case Match_Success:
> + Inst.setLoc(IDLoc);
> + Out.EmitInstruction(Inst, STI);
> + return false;
> + case Match_MissingFeature:
> + return Error(IDLoc, "missing feature");
> +
> + case Match_MnemonicFail:
> + return Error(IDLoc, "unrecognized instruction mnemonic");
>
> + case Match_InvalidOperand: {
> + SMLoc ErrorLoc = IDLoc;
> + if (ErrorInfo != ~0ULL) {
> + if (ErrorInfo >= Operands.size()) {
> + return Error(IDLoc, "too few operands for instruction");
> + }
> +
> + ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
> + if (ErrorLoc == SMLoc())
> + ErrorLoc = IDLoc;
> + }
> + return Error(ErrorLoc, "invalid operand for instruction");
> }
> - return Error(IDLoc, "invalid operand for instruction");
> - }
> }
> llvm_unreachable("Implement any new match types added!");
> }
> @@ -187,6 +542,19 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
> return true;
> }
>
> +static bool operandsHaveModifiers(const OperandVector &Operands) {
> +
> + for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
> + const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
> + if (Op.isRegWithInputMods())
> + return true;
> + if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
> + Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
> + return true;
> + }
> + return false;
> +}
> +
> AMDGPUAsmParser::OperandMatchResultTy
> AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>
> @@ -195,17 +563,104 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>
> // If we successfully parsed the operand or if there as an error parsing,
> // we are done.
> - if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
> + //
> + // If we are parsing after we reach EndOfStatement then this means we
> + // are appending default values to the Operands list. This is only done
> + // by custom parser, so we shouldn't continue on to the generic parsing.
> + if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
> + getLexer().is(AsmToken::EndOfStatement))
> return ResTy;
>
> + bool Negate = false, Abs = false;
> + if (getLexer().getKind()== AsmToken::Minus) {
> + Parser.Lex();
> + Negate = true;
> + }
> +
> + if (getLexer().getKind() == AsmToken::Pipe) {
> + Parser.Lex();
> + Abs = true;
> + }
> +
> switch(getLexer().getKind()) {
> case AsmToken::Integer: {
> + SMLoc S = Parser.getTok().getLoc();
> int64_t IntVal;
> if (getParser().parseAbsoluteExpression(IntVal))
> return MatchOperand_ParseFail;
> - Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
> + APInt IntVal32(32, IntVal);
> + if (IntVal32.getSExtValue() != IntVal) {
> + Error(S, "invalid immediate: only 32-bit values are legal");
> + return MatchOperand_ParseFail;
> + }
> +
> + IntVal = IntVal32.getSExtValue();
> + if (Negate)
> + IntVal *= -1;
> + Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
> return MatchOperand_Success;
> }
> + case AsmToken::Real: {
> + // FIXME: We should emit an error if a double precisions floating-point
> + // value is used. I'm not sure the best way to detect this.
> + SMLoc S = Parser.getTok().getLoc();
> + int64_t IntVal;
> + if (getParser().parseAbsoluteExpression(IntVal))
> + return MatchOperand_ParseFail;
> +
> + APFloat F((float)BitsToDouble(IntVal));
> + if (Negate)
> + F.changeSign();
> + Operands.push_back(
> + AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
> + return MatchOperand_Success;
> + }
> + case AsmToken::Identifier: {
> + SMLoc S, E;
> + unsigned RegNo;
> + if (!ParseRegister(RegNo, S, E)) {
> +
> + bool HasModifiers = operandsHaveModifiers(Operands);
> + unsigned Modifiers = 0;
> +
> + if (Negate)
> + Modifiers |= 0x1;
> +
> + if (Abs) {
> + if (getLexer().getKind() != AsmToken::Pipe)
> + return MatchOperand_ParseFail;
> + Parser.Lex();
> + Modifiers |= 0x2;
> + }
> +
> + if (Modifiers && !HasModifiers) {
> + // We are adding a modifier to src1 or src2 and previous sources
> + // don't have modifiers, so we need to go back and empty modifers
> + // for each previous source.
> + for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
> + --PrevRegIdx) {
> +
> + AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
> + RegOp.setModifiers(0);
> + }
> + }
> +
> +
> + Operands.push_back(AMDGPUOperand::CreateReg(
> + RegNo, S, E, getContext().getRegisterInfo()));
> +
> + if (HasModifiers || Modifiers) {
> + AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
> + RegOp.setModifiers(Modifiers);
> +
> + }
> + } else {
> + Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
> + S));
> + Parser.Lex();
> + }
> + return MatchOperand_Success;
> + }
> default:
> return MatchOperand_NoMatch;
> }
> @@ -214,23 +669,283 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
> bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
> StringRef Name,
> SMLoc NameLoc, OperandVector &Operands) {
> +
> + // Clear any forced encodings from the previous instruction.
> + setForcedEncodingSize(0);
> +
> + if (Name.endswith("_e64"))
> + setForcedEncodingSize(64);
> + else if (Name.endswith("_e32"))
> + setForcedEncodingSize(32);
> +
> // Add the instruction mnemonic
> Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
>
> - if (getLexer().is(AsmToken::EndOfStatement))
> - return false;
> + while (!getLexer().is(AsmToken::EndOfStatement)) {
> + AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
>
> - AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
> - switch (Res) {
> - case MatchOperand_Success: return false;
> - case MatchOperand_ParseFail: return Error(NameLoc,
> - "Failed parsing operand");
> - case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
> + // Eat the comma or space if there is one.
> + if (getLexer().is(AsmToken::Comma))
> + Parser.Lex();
> +
> + switch (Res) {
> + case MatchOperand_Success: break;
> + case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
> + "failed parsing operand.");
> + case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
> + "not a valid operand.");
> + }
> }
> - return true;
> +
> + // Once we reach end of statement, continue parsing so we can add default
> + // values for optional arguments.
> + AMDGPUAsmParser::OperandMatchResultTy Res;
> + while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) {
> + if (Res != MatchOperand_Success)
> + return Error(getLexer().getLoc(), "failed parsing operand.");
> + }
> + return false;
> }
>
> //===----------------------------------------------------------------------===//
> +// Utility functions
> +//===----------------------------------------------------------------------===//
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
> + int64_t Default) {
> +
> + // We are at the end of the statement, and this is a default argument, so
> + // use a default value.
> + if (getLexer().is(AsmToken::EndOfStatement)) {
> + Int = Default;
> + return MatchOperand_Success;
> + }
> +
> + switch(getLexer().getKind()) {
> + default: return MatchOperand_NoMatch;
> + case AsmToken::Identifier: {
> + StringRef OffsetName = Parser.getTok().getString();
> + if (!OffsetName.equals(Prefix))
> + return MatchOperand_NoMatch;
> +
> + Parser.Lex();
> + if (getLexer().isNot(AsmToken::Colon))
> + return MatchOperand_ParseFail;
> +
> + Parser.Lex();
> + if (getLexer().isNot(AsmToken::Integer))
> + return MatchOperand_ParseFail;
> +
> + if (getParser().parseAbsoluteExpression(Int))
> + return MatchOperand_ParseFail;
> + break;
> + }
> + }
> + return MatchOperand_Success;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
> + enum AMDGPUOperand::ImmTy ImmTy) {
> +
> + SMLoc S = Parser.getTok().getLoc();
> + int64_t Offset = 0;
> +
> + AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
> + if (Res != MatchOperand_Success)
> + return Res;
> +
> + Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
> + return MatchOperand_Success;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
> + enum AMDGPUOperand::ImmTy ImmTy) {
> + int64_t Bit = 0;
> + SMLoc S = Parser.getTok().getLoc();
> +
> + // We are at the end of the statement, and this is a default argument, so
> + // use a default value.
> + if (getLexer().isNot(AsmToken::EndOfStatement)) {
> + switch(getLexer().getKind()) {
> + case AsmToken::Identifier: {
> + StringRef Tok = Parser.getTok().getString();
> + if (Tok == Name) {
> + Bit = 1;
> + Parser.Lex();
> + } else if (Tok.startswith("no") && Tok.endswith(Name)) {
> + Bit = 0;
> + Parser.Lex();
> + } else {
> + return MatchOperand_NoMatch;
> + }
> + break;
> + }
> + default:
> + return MatchOperand_NoMatch;
> + }
> + }
> +
> + Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
> + return MatchOperand_Success;
> +}
> +
> +static bool operandsHasOptionalOp(const OperandVector &Operands,
> + const OptionalOperand &OOp) {
> + for (unsigned i = 0; i < Operands.size(); i++) {
> + const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]);
> + if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) ||
> + (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name))
> + return true;
> +
> + }
> + return false;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
> + OperandVector &Operands) {
> + SMLoc S = Parser.getTok().getLoc();
> + for (const OptionalOperand &Op : OptionalOps) {
> + if (operandsHasOptionalOp(Operands, Op))
> + continue;
> + AMDGPUAsmParser::OperandMatchResultTy Res;
> + int64_t Value;
> + if (Op.IsBit) {
> + Res = parseNamedBit(Op.Name, Operands, Op.Type);
> + if (Res == MatchOperand_NoMatch)
> + continue;
> + return Res;
> + }
> +
> + Res = parseIntWithPrefix(Op.Name, Value, Op.Default);
> +
> + if (Res == MatchOperand_NoMatch)
> + continue;
> +
> + if (Res != MatchOperand_Success)
> + return Res;
> +
> + if (Op.ConvertResult && !Op.ConvertResult(Value)) {
> + return MatchOperand_ParseFail;
> + }
> +
> + Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type));
> + return MatchOperand_Success;
> + }
> + return MatchOperand_NoMatch;
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// ds
> +//===----------------------------------------------------------------------===//
> +
> +static const OptionalOperand DSOptionalOps [] = {
> + {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
> + {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
> +};
> +
> +static const OptionalOperand DSOptionalOpsOff01 [] = {
> + {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
> + {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
> + {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
> +};
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
> + return parseOptionalOps(DSOptionalOps, Operands);
> +}
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
> + return parseOptionalOps(DSOptionalOpsOff01, Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
> + SMLoc S = Parser.getTok().getLoc();
> + AMDGPUAsmParser::OperandMatchResultTy Res =
> + parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
> + if (Res == MatchOperand_NoMatch) {
> + Operands.push_back(AMDGPUOperand::CreateImm(0, S,
> + AMDGPUOperand::ImmTyOffset));
> + Res = MatchOperand_Success;
> + }
> + return Res;
> +}
> +
> +bool AMDGPUOperand::isDSOffset() const {
> + return isImm() && isUInt<16>(getImm());
> +}
> +
> +bool AMDGPUOperand::isDSOffset01() const {
> + return isImm() && isUInt<8>(getImm());
> +}
> +
> +void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
> + const OperandVector &Operands) {
> +
> + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> +
> + for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
> + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> + // Add the register arguments
> + if (Op.isReg()) {
> + Op.addRegOperands(Inst, 1);
> + continue;
> + }
> +
> + // Handle optional arguments
> + OptionalIdx[Op.getImmTy()] = i;
> + }
> +
> + unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0];
> + unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1];
> + unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
> +
> + ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0
> + ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1
> + ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
> + Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
> +}
> +
> +void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
> +
> + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> + bool GDSOnly = false;
> +
> + for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
> + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> + // Add the register arguments
> + if (Op.isReg()) {
> + Op.addRegOperands(Inst, 1);
> + continue;
> + }
> +
> + if (Op.isToken() && Op.getToken() == "gds") {
> + GDSOnly = true;
> + continue;
> + }
> +
> + // Handle optional arguments
> + OptionalIdx[Op.getImmTy()] = i;
> + }
> +
> + unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
> + ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset
> +
> + if (!GDSOnly) {
> + unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
> + ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
> + }
> + Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
> +}
> +
> +
> +//===----------------------------------------------------------------------===//
> // s_waitcnt
> //===----------------------------------------------------------------------===//
>
> @@ -284,6 +999,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
> // expcnt [6:4]
> // lgkmcnt [10:8]
> int64_t CntVal = 0x77f;
> + SMLoc S = Parser.getTok().getLoc();
>
> switch(getLexer().getKind()) {
> default: return MatchOperand_ParseFail;
> @@ -300,7 +1016,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
> } while(getLexer().isNot(AsmToken::EndOfStatement));
> break;
> }
> - Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
> + Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
> return MatchOperand_Success;
> }
>
> @@ -308,6 +1024,245 @@ bool AMDGPUOperand::isSWaitCnt() const {
> return isImm();
> }
>
> +//===----------------------------------------------------------------------===//
> +// sopp branch targets
> +//===----------------------------------------------------------------------===//
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
> + SMLoc S = Parser.getTok().getLoc();
> +
> + switch (getLexer().getKind()) {
> + default: return MatchOperand_ParseFail;
> + case AsmToken::Integer: {
> + int64_t Imm;
> + if (getParser().parseAbsoluteExpression(Imm))
> + return MatchOperand_ParseFail;
> + Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
> + return MatchOperand_Success;
> + }
> +
> + case AsmToken::Identifier:
> + Operands.push_back(AMDGPUOperand::CreateExpr(
> + MCSymbolRefExpr::Create(getContext().GetOrCreateSymbol(
> + Parser.getTok().getString()), getContext()), S));
> + Parser.Lex();
> + return MatchOperand_Success;
> + }
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// mubuf
> +//===----------------------------------------------------------------------===//
> +
> +static const OptionalOperand MubufOptionalOps [] = {
> + {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
> + {"glc", AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
> + {"slc", AMDGPUOperand::ImmTySLC, true, 0, nullptr},
> + {"tfe", AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
> +};
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
> + return parseOptionalOps(MubufOptionalOps, Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
> + return parseIntWithPrefix("offset", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
> + return parseNamedBit("glc", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
> + return parseNamedBit("slc", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
> + return parseNamedBit("tfe", Operands);
> +}
> +
> +bool AMDGPUOperand::isMubufOffset() const {
> + return isImm() && isUInt<12>(getImm());
> +}
> +
> +void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
> + const OperandVector &Operands) {
> + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> +
> + for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
> + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> + // Add the register arguments
> + if (Op.isReg()) {
> + Op.addRegOperands(Inst, 1);
> + continue;
> + }
> +
> + // Handle the case where soffset is an immediate
> + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
> + Op.addImmOperands(Inst, 1);
> + continue;
> + }
> +
> + // Handle tokens like 'offen' which are sometimes hard-coded into the
> + // asm string. There are no MCInst operands for these.
> + if (Op.isToken()) {
> + continue;
> + }
> + assert(Op.isImm());
> +
> + // Handle optional arguments
> + OptionalIdx[Op.getImmTy()] = i;
> + }
> +
> + assert(OptionalIdx.size() == 4);
> +
> + unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
> + unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
> + unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
> + unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
> +
> + ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
> + ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
> + ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
> + ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// mimg
> +//===----------------------------------------------------------------------===//
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseDMask(OperandVector &Operands) {
> + return parseIntWithPrefix("dmask", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseUNorm(OperandVector &Operands) {
> + return parseNamedBit("unorm", Operands);
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseR128(OperandVector &Operands) {
> + return parseNamedBit("r128", Operands);
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// vop3
> +//===----------------------------------------------------------------------===//
> +
> +static bool ConvertOmodMul(int64_t &Mul) {
> + if (Mul != 1 && Mul != 2 && Mul != 4)
> + return false;
> +
> + Mul >>= 1;
> + return true;
> +}
> +
> +static bool ConvertOmodDiv(int64_t &Div) {
> + if (Div == 1) {
> + Div = 0;
> + return true;
> + }
> +
> + if (Div == 2) {
> + Div = 3;
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static const OptionalOperand VOP3OptionalOps [] = {
> + {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
> + {"mul", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
> + {"div", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
> +};
> +
> +static bool isVOP3(OperandVector &Operands) {
> + if (operandsHaveModifiers(Operands))
> + return true;
> +
> + AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
> +
> + if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
> + return true;
> +
> + if (Operands.size() >= 5)
> + return true;
> +
> + if (Operands.size() > 3) {
> + AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
> + if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
> + Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)))
> + return true;
> + }
> + return false;
> +}
> +
> +AMDGPUAsmParser::OperandMatchResultTy
> +AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
> +
> + // The value returned by this function may change after parsing
> + // an operand so store the original value here.
> + bool HasModifiers = operandsHaveModifiers(Operands);
> +
> + bool IsVOP3 = isVOP3(Operands);
> + if (HasModifiers || IsVOP3 ||
> + getLexer().isNot(AsmToken::EndOfStatement) ||
> + getForcedEncodingSize() == 64) {
> +
> + AMDGPUAsmParser::OperandMatchResultTy Res =
> + parseOptionalOps(VOP3OptionalOps, Operands);
> +
> + if (!HasModifiers && Res == MatchOperand_Success) {
> + // We have added a modifier operation, so we need to make sure all
> + // previous register operands have modifiers
> + for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
> + AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
> + if (Op.isReg())
> + Op.setModifiers(0);
> + }
> + }
> + return Res;
> + }
> + return MatchOperand_NoMatch;
> +}
> +
> +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
> + ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
> + unsigned i = 2;
> +
> + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
> +
> + if (operandsHaveModifiers(Operands)) {
> + for (unsigned e = Operands.size(); i != e; ++i) {
> + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
> +
> + if (Op.isRegWithInputMods()) {
> + ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2);
> + continue;
> + }
> + OptionalIdx[Op.getImmTy()] = i;
> + }
> +
> + unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp];
> + unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod];
> +
> + ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1);
> + ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1);
> + } else {
> + for (unsigned e = Operands.size(); i != e; ++i)
> + ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1);
> + }
> +}
> +
> /// Force static initialization.
> extern "C" void LLVMInitializeR600AsmParser() {
> RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> index c7f9da6..bdad818 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -127,7 +127,8 @@ void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
> O << " tfe";
> }
>
> -void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
> +void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
> + const MCRegisterInfo &MRI) {
> switch (reg) {
> case AMDGPU::VCC:
> O << "vcc";
> @@ -297,7 +298,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
> break;
>
> default:
> - printRegOperand(Op.getReg(), O);
> + printRegOperand(Op.getReg(), O, MRI);
> break;
> }
> } else if (Op.isImm()) {
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> index 5289718..b82e388 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> @@ -30,6 +30,8 @@ public:
> static const char *getRegisterName(unsigned RegNo);
>
> void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
> + static void printRegOperand(unsigned RegNo, raw_ostream &O,
> + const MCRegisterInfo &MRI);
>
> private:
> void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index af38c94..e2197fe 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -2082,3 +2082,38 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
> return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
> cast<RegisterSDNode>(VReg)->getReg(), VT);
> }
> +
> +//===----------------------------------------------------------------------===//
> +// SI Inline Assembly Support
> +//===----------------------------------------------------------------------===//
> +
> +std::pair<unsigned, const TargetRegisterClass *>
> +SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
> + const std::string &Constraint,
> + MVT VT) const {
> + if (Constraint == "r") {
> + switch(VT.SimpleTy) {
> + default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
> + case MVT::i64:
> + return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
> + case MVT::i32:
> + return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
> + }
> + }
> +
> + if (Constraint.size() > 1) {
> + const TargetRegisterClass *RC = nullptr;
> + if (Constraint[1] == 'v') {
> + RC = &AMDGPU::VGPR_32RegClass;
> + } else if (Constraint[1] == 's') {
> + RC = &AMDGPU::SGPR_32RegClass;
> + }
> +
> + if (RC) {
> + unsigned Idx = std::atoi(Constraint.substr(2).c_str());
> + if (Idx < RC->getNumRegs())
> + return std::make_pair(RC->getRegister(Idx), RC);
> + }
> + }
> + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
> +}
> diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
> index 92f5847..168de4c 100644
> --- a/lib/Target/R600/SIISelLowering.h
> +++ b/lib/Target/R600/SIISelLowering.h
> @@ -113,6 +113,10 @@ public:
> MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
> SDLoc DL,
> SDValue Ptr) const;
> +
> + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
> + const TargetRegisterInfo *TRI,
> + const std::string &Constraint, MVT VT) const;
> };
>
> } // End namespace llvm
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index e7a07a1..bc693c3 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -130,6 +130,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
> let AddedComplexity = -1000;
>
> let VOP3 = 1;
> + let VALU = 1;
> +
> + let AsmMatchConverter = "cvtVOP3";
> + let isCodeGenOnly = 0;
> +
> int Size = 8;
> }
>
> @@ -221,6 +226,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
> let mayLoad = 0;
> let mayStore = 0;
> let hasSideEffects = 0;
> + let isCodeGenOnly = 0;
> let SALU = 1;
> let SOP1 = 1;
> }
> @@ -231,6 +237,7 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
> let mayLoad = 0;
> let mayStore = 0;
> let hasSideEffects = 0;
> + let isCodeGenOnly = 0;
> let SALU = 1;
> let SOP2 = 1;
>
> @@ -246,6 +253,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let hasSideEffects = 0;
> let SALU = 1;
> let SOPC = 1;
> + let isCodeGenOnly = 0;
>
> let UseNamedOperandTable = 1;
> }
> @@ -563,10 +571,14 @@ let Uses = [EXEC] in {
>
> class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
> VOP1Common <outs, ins, asm, pattern>,
> - VOP1e<op>;
> + VOP1e<op> {
> + let isCodeGenOnly = 0;
> +}
>
> class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
> + VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
> + let isCodeGenOnly = 0;
> +}
>
> class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
> VOPCCommon <ins, asm, pattern>, VOPCe <op>;
> @@ -599,6 +611,7 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
> let mayStore = 1;
>
> let hasSideEffects = 0;
> + let AsmMatchConverter = "cvtDS";
> let SchedRW = [WriteLDS];
> }
>
> @@ -611,6 +624,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
>
> let hasSideEffects = 0;
> let UseNamedOperandTable = 1;
> + let AsmMatchConverter = "cvtMubuf";
> let SchedRW = [WriteVMEM];
> }
>
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 345e699..9091b19 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -6,6 +6,15 @@
> // License. See LICENSE.TXT for details.
> //
> //===----------------------------------------------------------------------===//
> +def isSICI : Predicate<
> + "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
> + "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
> +>, AssemblerPredicate<"FeatureGCN1Encoding">;
> +def isCI : Predicate<"Subtarget->getGeneration() "
> + ">= AMDGPUSubtarget::SEA_ISLANDS">;
> +def isVI : Predicate <
> + "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
> + AssemblerPredicate<"FeatureGCN3Encoding">;
>
> class vop {
> field bits<9> SI3;
> @@ -233,14 +242,88 @@ def FRAMEri32 : Operand<iPTR> {
> let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
> }
>
> +def SoppBrTarget : AsmOperandClass {
> + let Name = "SoppBrTarget";
> + let ParserMethod = "parseSOppBrTarget";
> +}
> +
> def sopp_brtarget : Operand<OtherVT> {
> let EncoderMethod = "getSOPPBrEncoding";
> let OperandType = "OPERAND_PCREL";
> + let ParserMatchClass = SoppBrTarget;
> }
>
> include "SIInstrFormats.td"
> include "VIInstrFormats.td"
>
> +def MubufOffsetMatchClass : AsmOperandClass {
> + let Name = "MubufOffset";
> + let ParserMethod = "parseMubufOptionalOps";
> + let RenderMethod = "addImmOperands";
> +}
> +
> +class DSOffsetBaseMatchClass <string parser> : AsmOperandClass {
> + let Name = "DSOffset"#parser;
> + let ParserMethod = parser;
> + let RenderMethod = "addImmOperands";
> + let PredicateMethod = "isDSOffset";
> +}
> +
> +def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">;
> +def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">;
> +
> +def DSOffset01MatchClass : AsmOperandClass {
> + let Name = "DSOffset1";
> + let ParserMethod = "parseDSOff01OptionalOps";
> + let RenderMethod = "addImmOperands";
> + let PredicateMethod = "isDSOffset01";
> +}
> +
> +class GDSBaseMatchClass <string parser> : AsmOperandClass {
> + let Name = "GDS"#parser;
> + let PredicateMethod = "isImm";
> + let ParserMethod = parser;
> + let RenderMethod = "addImmOperands";
> +}
> +
> +def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">;
> +def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
> +
> +def GLCMatchClass : AsmOperandClass {
> + let Name = "GLC";
> + let PredicateMethod = "isImm";
> + let ParserMethod = "parseMubufOptionalOps";
> + let RenderMethod = "addImmOperands";
> +}
> +
> +def SLCMatchClass : AsmOperandClass {
> + let Name = "SLC";
> + let PredicateMethod = "isImm";
> + let ParserMethod = "parseMubufOptionalOps";
> + let RenderMethod = "addImmOperands";
> +}
> +
> +def TFEMatchClass : AsmOperandClass {
> + let Name = "TFE";
> + let PredicateMethod = "isImm";
> + let ParserMethod = "parseMubufOptionalOps";
> + let RenderMethod = "addImmOperands";
> +}
> +
> +def OModMatchClass : AsmOperandClass {
> + let Name = "OMod";
> + let PredicateMethod = "isImm";
> + let ParserMethod = "parseVOP3OptionalOps";
> + let RenderMethod = "addImmOperands";
> +}
> +
> +def ClampMatchClass : AsmOperandClass {
> + let Name = "Clamp";
> + let PredicateMethod = "isImm";
> + let ParserMethod = "parseVOP3OptionalOps";
> + let RenderMethod = "addImmOperands";
> +}
> +
> let OperandType = "OPERAND_IMMEDIATE" in {
>
> def offen : Operand<i1> {
> @@ -254,35 +337,52 @@ def addr64 : Operand<i1> {
> }
> def mbuf_offset : Operand<i16> {
> let PrintMethod = "printMBUFOffset";
> + let ParserMatchClass = MubufOffsetMatchClass;
> }
> -def ds_offset : Operand<i16> {
> +class ds_offset_base <AsmOperandClass mc> : Operand<i16> {
> let PrintMethod = "printDSOffset";
> + let ParserMatchClass = mc;
> }
> +def ds_offset : ds_offset_base <DSOffsetMatchClass>;
> +def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>;
> +
> def ds_offset0 : Operand<i8> {
> let PrintMethod = "printDSOffset0";
> + let ParserMatchClass = DSOffset01MatchClass;
> }
> def ds_offset1 : Operand<i8> {
> let PrintMethod = "printDSOffset1";
> + let ParserMatchClass = DSOffset01MatchClass;
> }
> -def gds : Operand <i1> {
> +class gds_base <AsmOperandClass mc> : Operand <i1> {
> let PrintMethod = "printGDS";
> + let ParserMatchClass = mc;
> }
> +def gds : gds_base <GDSMatchClass>;
> +
> +def gds01 : gds_base <GDS01MatchClass>;
> +
> def glc : Operand <i1> {
> let PrintMethod = "printGLC";
> + let ParserMatchClass = GLCMatchClass;
> }
> def slc : Operand <i1> {
> let PrintMethod = "printSLC";
> + let ParserMatchClass = SLCMatchClass;
> }
> def tfe : Operand <i1> {
> let PrintMethod = "printTFE";
> + let ParserMatchClass = TFEMatchClass;
> }
>
> def omod : Operand <i32> {
> let PrintMethod = "printOModSI";
> + let ParserMatchClass = OModMatchClass;
> }
>
> def ClampMod : Operand <i1> {
> let PrintMethod = "printClampSI";
> + let ParserMatchClass = ClampMatchClass;
> }
>
> } // End OperandType = "OPERAND_IMMEDIATE"
> @@ -391,12 +491,18 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
> class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
> SOP1 <outs, ins, asm, []>,
> SOP1e <op.SI>,
> - SIMCInstr<opName, SISubtarget.SI>;
> + SIMCInstr<opName, SISubtarget.SI> {
> + let isCodeGenOnly = 0;
> + let AssemblerPredicates = [isSICI];
> +}
>
> class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
> SOP1 <outs, ins, asm, []>,
> SOP1e <op.VI>,
> - SIMCInstr<opName, SISubtarget.VI>;
> + SIMCInstr<opName, SISubtarget.VI> {
> + let isCodeGenOnly = 0;
> + let AssemblerPredicates = [isVI];
> +}
>
> multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm,
> list<dag> pattern> {
> @@ -472,12 +578,16 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
> class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
> SOP2<outs, ins, asm, []>,
> SOP2e<op.SI>,
> - SIMCInstr<opName, SISubtarget.SI>;
> + SIMCInstr<opName, SISubtarget.SI> {
> + let AssemblerPredicates = [isSICI];
> +}
>
> class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
> SOP2<outs, ins, asm, []>,
> SOP2e<op.VI>,
> - SIMCInstr<opName, SISubtarget.VI>;
> + SIMCInstr<opName, SISubtarget.VI> {
> + let AssemblerPredicates = [isVI];
> +}
>
> multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
> def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
> @@ -539,12 +649,18 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
> class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
> SOPK <outs, ins, asm, []>,
> SOPKe <op.SI>,
> - SIMCInstr<opName, SISubtarget.SI>;
> + SIMCInstr<opName, SISubtarget.SI> {
> + let AssemblerPredicates = [isSICI];
> + let isCodeGenOnly = 0;
> +}
>
> class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
> SOPK <outs, ins, asm, []>,
> SOPKe <op.VI>,
> - SIMCInstr<opName, SISubtarget.VI>;
> + SIMCInstr<opName, SISubtarget.VI> {
> + let AssemblerPredicates = [isVI];
> + let isCodeGenOnly = 0;
> +}
>
> multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
> string asm = opName#opAsm> {
> @@ -619,13 +735,17 @@ class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
> string asm> :
> SMRD <outs, ins, asm, []>,
> SMRDe <op, imm>,
> - SIMCInstr<opName, SISubtarget.SI>;
> + SIMCInstr<opName, SISubtarget.SI> {
> + let AssemblerPredicates = [isSICI];
> +}
>
> class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
> string asm> :
> SMRD <outs, ins, asm, []>,
> SMEMe_vi <op, imm>,
> - SIMCInstr<opName, SISubtarget.VI>;
> + SIMCInstr<opName, SISubtarget.VI> {
> + let AssemblerPredicates = [isVI];
> +}
>
> multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
> string asm, list<dag> pattern> {
> @@ -664,8 +784,14 @@ multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
> def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
> let PrintMethod = "printOperandAndMods";
> }
> +
> +def InputModsMatchClass : AsmOperandClass {
> + let Name = "RegWithInputMods";
> +}
> +
> def InputModsNoDefault : Operand <i32> {
> let PrintMethod = "printOperandAndMods";
> + let ParserMatchClass = InputModsMatchClass;
> }
>
> class getNumSrcArgs<ValueType Src1, ValueType Src2> {
> @@ -873,7 +999,8 @@ class AtomicNoRet <string noRetOp, bit isRet> {
> class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
> VOP1Common <outs, ins, "", pattern>,
> VOP <opName>,
> - SIMCInstr <opName#"_e32", SISubtarget.NONE> {
> + SIMCInstr <opName#"_e32", SISubtarget.NONE>,
> + MnemonicAlias<opName#"_e32", opName> {
> let isPseudo = 1;
> let isCodeGenOnly = 1;
>
> @@ -908,18 +1035,23 @@ multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
> class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
> VOP2Common <outs, ins, "", pattern>,
> VOP <opName>,
> - SIMCInstr<opName#"_e32", SISubtarget.NONE> {
> + SIMCInstr<opName#"_e32", SISubtarget.NONE>,
> + MnemonicAlias<opName#"_e32", opName> {
> let isPseudo = 1;
> let isCodeGenOnly = 1;
> }
>
> class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
> VOP2 <op.SI, outs, ins, opName#asm, []>,
> - SIMCInstr <opName#"_e32", SISubtarget.SI>;
> + SIMCInstr <opName#"_e32", SISubtarget.SI> {
> + let AssemblerPredicates = [isSICI];
> +}
>
> class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
> VOP2 <op.SI, outs, ins, opName#asm, []>,
> - SIMCInstr <opName#"_e32", SISubtarget.VI>;
> + SIMCInstr <opName#"_e32", SISubtarget.VI> {
> + let AssemblerPredicates = [isVI];
> +}
>
> multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
> string opName, string revOp> {
> @@ -965,7 +1097,8 @@ class VOP3DisableModFields <bit HasSrc0Mods,
> class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
> VOP3Common <outs, ins, "", pattern>,
> VOP <opName>,
> - SIMCInstr<opName#"_e64", SISubtarget.NONE> {
> + SIMCInstr<opName#"_e64", SISubtarget.NONE>,
> + MnemonicAlias<opName#"_e64", opName> {
> let isPseudo = 1;
> let isCodeGenOnly = 1;
> }
> @@ -973,22 +1106,30 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
> class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
> VOP3Common <outs, ins, asm, []>,
> VOP3e <op>,
> - SIMCInstr<opName#"_e64", SISubtarget.SI>;
> + SIMCInstr<opName#"_e64", SISubtarget.SI> {
> + let AssemblerPredicates = [isSICI];
> +}
>
> class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
> VOP3Common <outs, ins, asm, []>,
> VOP3e_vi <op>,
> - SIMCInstr <opName#"_e64", SISubtarget.VI>;
> + SIMCInstr <opName#"_e64", SISubtarget.VI> {
> + let AssemblerPredicates = [isVI];
> +}
>
> class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
> VOP3Common <outs, ins, asm, []>,
> VOP3be <op>,
> - SIMCInstr<opName#"_e64", SISubtarget.SI>;
> + SIMCInstr<opName#"_e64", SISubtarget.SI> {
> + let AssemblerPredicates = [isSICI];
> +}
>
> class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
> VOP3Common <outs, ins, asm, []>,
> VOP3be_vi <op>,
> - SIMCInstr <opName#"_e64", SISubtarget.VI>;
> + SIMCInstr <opName#"_e64", SISubtarget.VI> {
> + let AssemblerPredicates = [isVI];
> +}
>
> multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
> string opName, int NumSrcArgs, bit HasMods = 1> {
> @@ -1129,12 +1270,16 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
> }
>
> def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
> - SIMCInstr <opName, SISubtarget.SI>;
> + SIMCInstr <opName, SISubtarget.SI> {
> + let AssemblerPredicates = [isSICI];
> + }
>
> def _vi : VOP3Common <outs, ins, asm, []>,
> VOP3e_vi <op.VI3>,
> VOP3DisableFields <1, 0, 0>,
> - SIMCInstr <opName, SISubtarget.VI>;
> + SIMCInstr <opName, SISubtarget.VI> {
> + let AssemblerPredicates = [isVI];
> + }
> }
>
> multiclass VOP1_Helper <vop1 op, string opName, dag outs,
> @@ -1287,7 +1432,8 @@ let isCodeGenOnly = 0 in {
> class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
> VOPCCommon <ins, "", pattern>,
> VOP <opName>,
> - SIMCInstr<opName#"_e32", SISubtarget.NONE> {
> + SIMCInstr<opName#"_e32", SISubtarget.NONE>,
> + MnemonicAlias<opName#"_e32", opName> {
> let isPseudo = 1;
> let isCodeGenOnly = 1;
> }
> @@ -1534,7 +1680,9 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
> class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
> DS <outs, ins, asm, []>,
> DSe <op>,
> - SIMCInstr <opName, SISubtarget.SI>;
> + SIMCInstr <opName, SISubtarget.SI> {
> + let isCodeGenOnly = 0;
> +}
>
> class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
> DS <outs, ins, asm, []>,
> @@ -1548,6 +1696,7 @@ class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm
> bits<16> offset;
> let offset0 = offset{7-0};
> let offset1 = offset{15-8};
> + let isCodeGenOnly = 0;
> }
>
> class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
> @@ -1575,12 +1724,12 @@ multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
> multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
> dag outs = (outs rc:$vdst),
> dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
> - gds:$gds, M0Reg:$m0),
> + gds01:$gds, M0Reg:$m0),
> string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
>
> def "" : DS_Pseudo <opName, outs, ins, []>;
>
> - let data0 = 0, data1 = 0 in {
> + let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in {
> def _si : DS_Real_si <op, opName, outs, ins, asm>;
> def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
> }
> @@ -1604,12 +1753,12 @@ multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
> multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
> dag outs = (outs),
> dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
> - ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0),
> + ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds, M0Reg:$m0),
> string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
>
> def "" : DS_Pseudo <opName, outs, ins, []>;
>
> - let vdst = 0 in {
> + let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
> def _si : DS_Real_si <op, opName, outs, ins, asm>;
> def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
> }
> @@ -1683,7 +1832,7 @@ multiclass DS_0A_RET <bits<8> op, string opName,
>
> multiclass DS_1A_RET_GDS <bits<8> op, string opName,
> dag outs = (outs VGPR_32:$vdst),
> - dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
> + dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset, M0Reg:$m0),
> string asm = opName#" $vdst, $addr"#"$offset gds"> {
>
> def "" : DS_Pseudo <opName, outs, ins, []>;
> @@ -1792,6 +1941,20 @@ class mubuf <bits<7> si, bits<7> vi = si> {
> field bits<7> VI = vi;
> }
>
> +let isCodeGenOnly = 0 in {
> +
> +class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
> + let lds = 0;
> +}
> +
> +} // End let isCodeGenOnly = 0
> +
> +class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
> + let lds = 0;
> +}
> +
> class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
> bit IsAddr64 = is_addr64;
> string OpName = NAME # suffix;
> @@ -1835,7 +1998,7 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
> def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
> MUBUFAddr64Table <0>;
>
> - let addr64 = 0 in {
> + let addr64 = 0, isCodeGenOnly = 0 in {
> def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
> }
>
> @@ -1848,7 +2011,7 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
> def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
> MUBUFAddr64Table <1>;
>
> - let addr64 = 1 in {
> + let addr64 = 1, isCodeGenOnly = 0 in {
> def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
> }
>
> @@ -1856,11 +2019,6 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
> // for VI appropriately.
> }
>
> -class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
> - let lds = 0;
> -}
> -
> multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
> string asm, list<dag> pattern, bit is_return> {
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index d6e4986..7c74eb6 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -27,16 +27,8 @@ def SendMsgImm : Operand<i32> {
> }
>
> def isGCN : Predicate<"Subtarget->getGeneration() "
> - ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
> -def isSICI : Predicate<
> - "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
> - "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
> ->;
> -def isCI : Predicate<"Subtarget->getGeneration() "
> - ">= AMDGPUSubtarget::SEA_ISLANDS">;
> -def isVI : Predicate <
> - "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS"
> ->;
> + ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
> + AssemblerPredicate<"FeatureGCN">;
>
> def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
>
> @@ -240,9 +232,9 @@ defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
> >;
> } // End Defs = [SCC]
>
> -defm S_CSELECT_B32 : SOP2_SELECT_32 <sop2<0x0a>, "s_cselect_b32", []>;
>
> let Uses = [SCC] in {
> + defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>;
> defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
> } // End Uses = [SCC]
>
> @@ -1663,7 +1655,6 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
> VOP_F32_F32_I32, AMDGPUldexp
> >;
>
> -
> defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
> VOP_I32_F32_I32>; // TODO: set "Uses = dst"
>
> diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
> index 7bb5dc2..f289014 100644
> --- a/lib/Target/R600/SIRegisterInfo.td
> +++ b/lib/Target/R600/SIRegisterInfo.td
> @@ -66,7 +66,7 @@ foreach Index = 0-255 in {
> //===----------------------------------------------------------------------===//
>
> // SGPR 32-bit registers
> -def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
> +def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
> (add (sequence "SGPR%u", 0, 101))>;
>
> // SGPR 64-bit registers
> @@ -113,7 +113,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
> (add (decimate (shl SGPR_32, 15), 4))]>;
>
> // VGPR 32-bit registers
> -def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
> +def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
> (add (sequence "VGPR%u", 0, 255))>;
>
> // VGPR 64-bit registers
> @@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
> // Register classes used as source and destination
> //===----------------------------------------------------------------------===//
>
> +class RegImmMatcher<string name> : AsmOperandClass {
> + let Name = name;
> + let RenderMethod = "addRegOrImmOperands";
> +}
> +
> // Special register classes for predicates and the M0 register
> def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
> let CopyCost = -1; // Theoretically it is possible to read from SCC,
> @@ -180,7 +185,7 @@ def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
> def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
>
> // Register class for all scalar registers (SGPRs + Special Registers)
> -def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
> +def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
> (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
> >;
>
> @@ -227,15 +232,21 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
> // SSrc_* Operands with an SGPR or a 32-bit immediate
> //===----------------------------------------------------------------------===//
>
> -def SSrc_32 : RegImmOperand<SReg_32>;
> +def SSrc_32 : RegImmOperand<SReg_32> {
> + let ParserMatchClass = RegImmMatcher<"SSrc32">;
> +}
>
> -def SSrc_64 : RegImmOperand<SReg_64>;
> +def SSrc_64 : RegImmOperand<SReg_64> {
> + let ParserMatchClass = RegImmMatcher<"SSrc64">;
> +}
>
> //===----------------------------------------------------------------------===//
> // SCSrc_* Operands with an SGPR or a inline constant
> //===----------------------------------------------------------------------===//
>
> -def SCSrc_32 : RegInlineOperand<SReg_32>;
> +def SCSrc_32 : RegInlineOperand<SReg_32> {
> + let ParserMatchClass = RegImmMatcher<"SCSrc32">;
> +}
>
> //===----------------------------------------------------------------------===//
> // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
> @@ -245,14 +256,30 @@ def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
>
> def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
>
> -def VSrc_32 : RegImmOperand<VS_32>;
> +def VSrc_32 : RegisterOperand<VS_32> {
> + let OperandNamespace = "AMDGPU";
> + let OperandType = "OPERAND_REG_IMM32";
> + let ParserMatchClass = RegImmMatcher<"VSrc32">;
> +}
>
> -def VSrc_64 : RegImmOperand<VS_64>;
> +def VSrc_64 : RegisterOperand<VS_64> {
> + let OperandNamespace = "AMDGPU";
> + let OperandType = "OPERAND_REG_IMM32";
> + let ParserMatchClass = RegImmMatcher<"VSrc64">;
> +}
>
> //===----------------------------------------------------------------------===//
> // VCSrc_* Operands with an SGPR, VGPR or an inline constant
> //===----------------------------------------------------------------------===//
>
> -def VCSrc_32 : RegInlineOperand<VS_32>;
> +def VCSrc_32 : RegisterOperand<VS_32> {
> + let OperandNamespace = "AMDGPU";
> + let OperandType = "OPERAND_REG_INLINE_C";
> + let ParserMatchClass = RegImmMatcher<"VCSrc32">;
> +}
>
> -def VCSrc_64 : RegInlineOperand<VS_64>;
> +def VCSrc_64 : RegisterOperand<VS_64> {
> + let OperandNamespace = "AMDGPU";
> + let OperandType = "OPERAND_REG_INLINE_C";
> + let ParserMatchClass = RegImmMatcher<"VCSrc64">;
> +}
> diff --git a/test/MC/R600/ds-err.s b/test/MC/R600/ds-err.s
> new file mode 100644
> index 0000000..52c2740
> --- /dev/null
> +++ b/test/MC/R600/ds-err.s
> @@ -0,0 +1,23 @@
> +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
> +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
> +
> +// offset too big
> +// CHECK: invalid operand for instruction
> +ds_add_u32 v2, v4 offset:1000000000
> +
> +// offset0 twice
> +// CHECK: error: not a valid operand.
> +ds_write2_b32 v2, v4, v6 offset0:4 offset0:8
> +
> +// offset1 twice
> +// CHECK: error: not a valid operand.
> +ds_write2_b32 v2, v4, v6 offset1:4 offset1:8
> +
> +// offset0 too big
> +// CHECK: invalid operand for instruction
> +ds_write2_b32 v2, v4, v6 offset0:1000000000
> +
> +// offset1 too big
> +// CHECK: invalid operand for instruction
> +ds_write2_b32 v2, v4, v6 offset1:1000000000
> +
> diff --git a/test/MC/R600/ds.s b/test/MC/R600/ds.s
> new file mode 100644
> index 0000000..ad63229
> --- /dev/null
> +++ b/test/MC/R600/ds.s
> @@ -0,0 +1,337 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Checks for 16-bit Offsets
> +//===----------------------------------------------------------------------===//
> +
> +ds_add_u32 v2, v4 offset:16
> +// CHECK: ds_add_u32 v2, v4 offset:16 ; encoding: [0x10,0x00,0x00,0xd8,0x02,0x04,0x00,0x00]
> +
> +//===----------------------------------------------------------------------===//
> +// Checks for 2 8-bit Offsets
> +//===----------------------------------------------------------------------===//
> +
> +ds_write2_b32 v2, v4, v6 offset0:4
> +// CHECK: ds_write2_b32 v2, v4, v6 offset0:4 ; encoding: [0x04,0x00,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write2_b32 v2, v4, v6 offset0:4 offset1:8
> +// CHECK: ds_write2_b32 v2, v4, v6 offset0:4 offset1:8 ; encoding: [0x04,0x08,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write2_b32 v2, v4, v6 offset1:8
> +// CHECK: ds_write2_b32 v2, v4, v6 offset1:8 ; encoding: [0x00,0x08,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_read2_b32 v[8:9], v2 offset0:4
> +// CHECK: ds_read2_b32 v[8:9], v2 offset0:4 ; encoding: [0x04,0x00,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b32 v[8:9], v2 offset0:4 offset1:8
> +// CHECK: ds_read2_b32 v[8:9], v2 offset0:4 offset1:8 ; encoding: [0x04,0x08,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b32 v[8:9], v2 offset1:8
> +// CHECK: ds_read2_b32 v[8:9], v2 offset1:8 ; encoding: [0x00,0x08,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +ds_add_u32 v2, v4
> +// CHECK: ds_add_u32 v2, v4 ; encoding: [0x00,0x00,0x00,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_sub_u32 v2, v4
> +// CHECK: ds_sub_u32 v2, v4 ; encoding: [0x00,0x00,0x04,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_rsub_u32 v2, v4
> +// CHECK: ds_rsub_u32 v2, v4 ; encoding: [0x00,0x00,0x08,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_inc_u32 v2, v4
> +// CHECK: ds_inc_u32 v2, v4 ; encoding: [0x00,0x00,0x0c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_dec_u32 v2, v4
> +// CHECK: ds_dec_u32 v2, v4 ; encoding: [0x00,0x00,0x10,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_min_i32 v2, v4
> +// CHECK: ds_min_i32 v2, v4 ; encoding: [0x00,0x00,0x14,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_max_i32 v2, v4
> +// CHECK: ds_max_i32 v2, v4 ; encoding: [0x00,0x00,0x18,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_min_u32 v2, v4
> +// CHECK: ds_min_u32 v2, v4 ; encoding: [0x00,0x00,0x1c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_max_u32 v2, v4
> +// CHECK: ds_max_u32 v2, v4 ; encoding: [0x00,0x00,0x20,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_and_b32 v2, v4
> +// CHECK: ds_and_b32 v2, v4 ; encoding: [0x00,0x00,0x24,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_or_b32 v2, v4
> +// CHECK: ds_or_b32 v2, v4 ; encoding: [0x00,0x00,0x28,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_xor_b32 v2, v4
> +// CHECK: ds_xor_b32 v2, v4 ; encoding: [0x00,0x00,0x2c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_mskor_b32 v2, v4, v6
> +// CHECK: ds_mskor_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x30,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write_b32 v2, v4
> +// CHECK: ds_write_b32 v2, v4 ; encoding: [0x00,0x00,0x34,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_write2_b32 v2, v4, v6
> +// CHECK: ds_write2_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x38,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_write2st64_b32 v2, v4, v6
> +// CHECK: ds_write2st64_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x3c,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_b32 v2, v4, v6
> +// CHECK: ds_cmpst_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x40,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_f32 v2, v4, v6
> +// CHECK: ds_cmpst_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x44,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_min_f32 v2, v4, v6
> +// CHECK: ds_min_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x48,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_max_f32 v2, v4, v6
> +// CHECK: ds_max_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x4c,0xd8,0x02,0x04,0x06,0x00]
> +
> +ds_gws_init v2 gds
> +// CHECK: ds_gws_init v2 gds ; encoding: [0x00,0x00,0x66,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_sema_v v2 gds
> +// CHECK: ds_gws_sema_v v2 gds ; encoding: [0x00,0x00,0x6a,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_sema_br v2 gds
> +// CHECK: ds_gws_sema_br v2 gds ; encoding: [0x00,0x00,0x6e,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_sema_p v2 gds
> +// CHECK: ds_gws_sema_p v2 gds ; encoding: [0x00,0x00,0x72,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_gws_barrier v2 gds
> +// CHECK: ds_gws_barrier v2 gds ; encoding: [0x00,0x00,0x76,0xd8,0x02,0x00,0x00,0x00]
> +
> +ds_write_b8 v2, v4
> +// CHECK: ds_write_b8 v2, v4 ; encoding: [0x00,0x00,0x78,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_write_b16 v2, v4
> +// CHECK: ds_write_b16 v2, v4 ; encoding: [0x00,0x00,0x7c,0xd8,0x02,0x04,0x00,0x00]
> +
> +ds_add_rtn_u32 v8, v2, v4
> +// CHECK: ds_add_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x80,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_sub_rtn_u32 v8, v2, v4
> +// CHECK: ds_sub_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x84,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_rsub_rtn_u32 v8, v2, v4
> +// CHECK: ds_rsub_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x88,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_inc_rtn_u32 v8, v2, v4
> +// CHECK: ds_inc_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x8c,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_dec_rtn_u32 v8, v2, v4
> +// CHECK: ds_dec_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x90,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_i32 v8, v2, v4
> +// CHECK: ds_min_rtn_i32 v8, v2, v4 ; encoding: [0x00,0x00,0x94,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_i32 v8, v2, v4
> +// CHECK: ds_max_rtn_i32 v8, v2, v4 ; encoding: [0x00,0x00,0x98,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_u32 v8, v2, v4
> +// CHECK: ds_min_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x9c,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_u32 v8, v2, v4
> +// CHECK: ds_max_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0xa0,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_and_rtn_b32 v8, v2, v4
> +// CHECK: ds_and_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xa4,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_or_rtn_b32 v8, v2, v4
> +// CHECK: ds_or_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xa8,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_xor_rtn_b32 v8, v2, v4
> +// CHECK: ds_xor_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xac,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_mskor_rtn_b32 v8, v2, v4, v6
> +// CHECK: ds_mskor_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xb0,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg_rtn_b32 v8, v2, v4
> +// CHECK: ds_wrxchg_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xb4,0xd8,0x02,0x04,0x00,0x08]
> +
> +ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6
> +// CHECK: ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xb8,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg2st64_rtn_b32 v[8:9] v2, v4, v6
> +// CHECK: ds_wrxchg2st64_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xbc,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_b32 v8, v2, v4, v6
> +// CHECK: ds_cmpst_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc0,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_f32 v8, v2, v4, v6
> +// CHECK: ds_cmpst_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc4,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_min_rtn_f32 v8, v2, v4, v6
> +// CHECK: ds_min_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc8,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_max_rtn_f32 v8, v2, v4, v6
> +// CHECK: ds_max_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xcc,0xd8,0x02,0x04,0x06,0x08]
> +
> +ds_swizzle_b32 v8, v2
> +// CHECK: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_b32 v8, v2
> +// CHECK: ds_read_b32 v8, v2 ; encoding: [0x00,0x00,0xd8,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b32 v[8:9], v2
> +// CHECK: ds_read2_b32 v[8:9], v2 ; encoding: [0x00,0x00,0xdc,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read2st64_b32 v[8:9], v2
> +// CHECK: ds_read2st64_b32 v[8:9], v2 ; encoding: [0x00,0x00,0xe0,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_i8 v8, v2
> +// CHECK: ds_read_i8 v8, v2 ; encoding: [0x00,0x00,0xe4,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_u8 v8, v2
> +// CHECK: ds_read_u8 v8, v2 ; encoding: [0x00,0x00,0xe8,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_i16 v8, v2
> +// CHECK: ds_read_i16 v8, v2 ; encoding: [0x00,0x00,0xec,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_read_u16 v8, v2
> +// CHECK: ds_read_u16 v8, v2 ; encoding: [0x00,0x00,0xf0,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_consume v8
> +// CHECK: ds_consume v8 ; encoding: [0x00,0x00,0xf4,0xd8,0x00,0x00,0x00,0x08]
> +
> +ds_append v8
> +// CHECK: ds_append v8 ; encoding: [0x00,0x00,0xf8,0xd8,0x00,0x00,0x00,0x08]
> +
> +ds_ordered_count v8, v2 gds
> +// CHECK: ds_ordered_count v8, v2 gds ; encoding: [0x00,0x00,0xfe,0xd8,0x02,0x00,0x00,0x08]
> +
> +ds_add_u64 v2, v[4:5]
> +// CHECK: ds_add_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x00,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_sub_u64 v2, v[4:5]
> +// CHECK: ds_sub_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x04,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_rsub_u64 v2, v[4:5]
> +// CHECK: ds_rsub_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x08,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_inc_u64 v2, v[4:5]
> +// CHECK: ds_inc_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x0c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_dec_u64 v2, v[4:5]
> +// CHECK: ds_dec_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x10,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_min_i64 v2, v[4:5]
> +// CHECK: ds_min_i64 v2, v[4:5] ; encoding: [0x00,0x00,0x14,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_max_i64 v2, v[4:5]
> +// CHECK: ds_max_i64 v2, v[4:5] ; encoding: [0x00,0x00,0x18,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_min_u64 v2, v[4:5]
> +// CHECK: ds_min_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x1c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_max_u64 v2, v[4:5]
> +// CHECK: ds_max_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x20,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_and_b64 v2, v[4:5]
> +// CHECK: ds_and_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x24,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_or_b64 v2, v[4:5]
> +// CHECK: ds_or_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x28,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_xor_b64 v2, v[4:5]
> +// CHECK: ds_xor_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x2c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_mskor_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_mskor_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x30,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_write_b64 v2, v[4:5]
> +// CHECK: ds_write_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x34,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_write2_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_write2_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x38,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_write2st64_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_write2st64_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x3c,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_b64 v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x40,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_cmpst_f64 v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_f64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x44,0xd9,0x02,0x04,0x06,0x00]
> +
> +ds_min_f64 v2, v[4:5]
> +// CHECK: ds_min_f64 v2, v[4:5] ; encoding: [0x00,0x00,0x48,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_max_f64 v2, v[4:5]
> +// CHECK: ds_max_f64 v2, v[4:5] ; encoding: [0x00,0x00,0x4c,0xd9,0x02,0x04,0x00,0x00]
> +
> +ds_add_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_add_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x80,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_sub_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_sub_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x84,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_rsub_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_rsub_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x88,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_inc_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_inc_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x8c,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_dec_rtn_u64 v[8:9] v2, v[4:5]
> +// CHECK: ds_dec_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x90,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_i64 v[8:9], v2, v[4:5]
> +// CHECK: ds_min_rtn_i64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x94,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_i64 v[8:9], v2, v[4:5]
> +// CHECK: ds_max_rtn_i64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x98,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_min_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_min_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x9c,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_u64 v[8:9], v2, v[4:5]
> +// CHECK: ds_max_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa0,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_and_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_and_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa4,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_or_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_or_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa8,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_xor_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_xor_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xac,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_mskor_rtn_b64 v[8:9], v2, v[4:5], v[6:7]
> +// CHECK: ds_mskor_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xb0,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg_rtn_b64 v[8:9], v2, v[4:5]
> +// CHECK: ds_wrxchg_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xb4,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
> +// CHECK: ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xb8,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
> +// CHECK: ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xbc,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xc0,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_cmpst_rtn_f64 v[8:9], v2, v[4:5], v[6:7]
> +// CHECK: ds_cmpst_rtn_f64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xc4,0xd9,0x02,0x04,0x06,0x08]
> +
> +ds_min_rtn_f64 v[8:9], v2, v[4:5]
> +// CHECK: ds_min_rtn_f64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xc8,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_max_rtn_f64 v[8:9], v2, v[4:5]
> +// CHECK: ds_max_rtn_f64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xcc,0xd9,0x02,0x04,0x00,0x08]
> +
> +ds_read_b64 v[8:9], v2
> +// CHECK: ds_read_b64 v[8:9], v2 ; encoding: [0x00,0x00,0xd8,0xd9,0x02,0x00,0x00,0x08]
> +
> +ds_read2_b64 v[8:11], v2
> +// CHECK: ds_read2_b64 v[8:11], v2 ; encoding: [0x00,0x00,0xdc,0xd9,0x02,0x00,0x00,0x08]
> +
> +ds_read2st64_b64 v[8:11], v2
> +// CHECK: ds_read2st64_b64 v[8:11], v2 ; encoding: [0x00,0x00,0xe0,0xd9,0x02,0x00,0x00,0x08]
> diff --git a/test/MC/R600/mubuf.s b/test/MC/R600/mubuf.s
> new file mode 100644
> index 0000000..78d365a
> --- /dev/null
> +++ b/test/MC/R600/mubuf.s
> @@ -0,0 +1,352 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Test for different operand combinations
> +//===----------------------------------------------------------------------===//
> +
> +//===----------------------------------------------------------------------===//
> +// load - immediate offset only
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, s[4:7], s1
> +// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 glc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 slc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 tfe
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 tfe glc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1 glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - vgpr offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen tfe glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - vgpr index
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen tfe glc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - vgpr index and offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// load - addr64
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x30,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
> +// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - immediate offset only
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, s[4:7], s1
> +// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 glc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 slc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 tfe
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 tfe glc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, s[4:7], s1 glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - vgpr offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen tfe glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - vgpr index
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen tfe glc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - vgpr index and offset
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// store - addr64
> +//===----------------------------------------------------------------------===//
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0x01,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x41,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x70,0xe0,0x02,0x01,0x81,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
> +// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +buffer_load_format_x v1, s[4:7], s1
> +// CHECK: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_format_xy v[1:2], s[4:7], s1
> +// CHECK: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_format_xyz v[1:3], s[4:7], s1
> +// CHECK: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_format_xyzw v[1:4], s[4:7], s1
> +// CHECK: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_x v1, s[4:7], s1
> +// CHECK: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_xy v[1:2], s[4:7], s1
> +// CHECK: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_xyz v[1:3], s[4:7], s1
> +// CHECK: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_format_xyzw v[1:4], s[4:7], s1
> +// CHECK: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_ubyte v1, s[4:7], s1
> +// CHECK: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_sbyte v1, s[4:7], s1
> +// CHECK: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_ushort v1, s[4:7], s1
> +// CHECK: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_sshort v1, s[4:7], s1
> +// CHECK: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dword v1, s[4:7], s1
> +// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dwordx2 v[1:2], s[4:7], s1
> +// CHECK: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_load_dwordx4 v[1:4], s[4:7], s1
> +// CHECK: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_byte v1, s[4:7], s1
> +// CHECK: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_short v1, s[4:7], s1
> +// CHECK: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dword v1 s[4:7], s1
> +// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dwordx2 v[1:2], s[4:7], s1
> +// CHECK: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
> +
> +buffer_store_dwordx4 v[1:4], s[4:7], s1
> +// CHECK: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
> +
> +// TODO: Atomics
> diff --git a/test/MC/R600/smrd.s b/test/MC/R600/smrd.s
> new file mode 100644
> index 0000000..b67abf7
> --- /dev/null
> +++ b/test/MC/R600/smrd.s
> @@ -0,0 +1,32 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +s_load_dword s1, s[2:3], 1
> +// CHECK: s_load_dword s1, s[2:3], 0x1 ; encoding: [0x01,0x83,0x00,0xc0]
> +
> +s_load_dword s1, s[2:3], s4
> +// CHECK: s_load_dword s1, s[2:3], s4 ; encoding: [0x04,0x82,0x00,0xc0]
> +
> +s_load_dwordx2 s[2:3], s[2:3], 1
> +// CHECK: s_load_dwordx2 s[2:3], s[2:3], 0x1 ; encoding: [0x01,0x03,0x41,0xc0]
> +
> +s_load_dwordx2 s[2:3], s[2:3], s4
> +// CHECK: s_load_dwordx2 s[2:3], s[2:3], s4 ; encoding: [0x04,0x02,0x41,0xc0]
> +
> +s_load_dwordx4 s[4:7], s[2:3], 1
> +// CHECK: s_load_dwordx4 s[4:7], s[2:3], 0x1 ; encoding: [0x01,0x03,0x82,0xc0]
> +
> +s_load_dwordx4 s[4:7], s[2:3], s4
> +// CHECK: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
> +
> +s_load_dwordx8 s[8:15], s[2:3], 1
> +// CHECK: s_load_dwordx8 s[8:15], s[2:3], 0x1 ; encoding: [0x01,0x03,0xc4,0xc0]
> +
> +s_load_dwordx8 s[8:15], s[2:3], s4
> +// CHECK: s_load_dwordx8 s[8:15], s[2:3], s4 ; encoding: [0x04,0x02,0xc4,0xc0]
> +
> +s_load_dwordx16 s[16:31], s[2:3], 1
> +// CHECK: s_load_dwordx16 s[16:31], s[2:3], 0x1 ; encoding: [0x01,0x03,0x08,0xc1]
> +
> +s_load_dwordx16 s[16:31], s[2:3], s4
> +// CHECK: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1]
> diff --git a/test/MC/R600/sop1-err.s b/test/MC/R600/sop1-err.s
> new file mode 100644
> index 0000000..f892356
> --- /dev/null
> +++ b/test/MC/R600/sop1-err.s
> @@ -0,0 +1,37 @@
> +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
> +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
> +
> +s_mov_b32 v1, s2
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s1, v0
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s[1:2], s0
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s0, s[1:2]
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s220, s0
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s0, s220
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b64 s1, s[0:1]
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b64 s[0:1], s1
> +// CHECK: error: invalid operand for instruction
> +
> +// Immediate greater than 32-bits
> +s_mov_b32 s1, 0xfffffffff
> +// CHECK: error: invalid immediate: only 32-bit values are legal
> +
> +// Immediate greater than 32-bits
> +s_mov_b64 s[0:1], 0xfffffffff
> +// CHECK: error: invalid immediate: only 32-bit values are legal
> +
> +// Out of range register
> +s_mov_b32 s
> diff --git a/test/MC/R600/sop1.s b/test/MC/R600/sop1.s
> new file mode 100644
> index 0000000..92ca73f
> --- /dev/null
> +++ b/test/MC/R600/sop1.s
> @@ -0,0 +1,177 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +s_mov_b32 s1, s2
> +// CHECK: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe]
> +
> +s_mov_b32 s1, 1
> +// CHECK: s_mov_b32 s1, 1 ; encoding: [0x81,0x03,0x81,0xbe]
> +
> +s_mov_b32 s1, 100
> +// CHECK: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00]
> +
> +s_mov_b64 s[2:3], s[4:5]
> +// CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
> +
> +s_mov_b64 s[2:3], 0xffffffffffffffff
> +// CHECK: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe]
> +
> +s_cmov_b32 s1, 200
> +// CHECK: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x05,0x81,0xbe,0xc8,0x00,0x00,0x00]
> +
> +s_cmov_b32 s1, 1.0
> +// CHECK: s_cmov_b32 s1, 1.0 ; encoding: [0xf2,0x05,0x81,0xbe]
> +
> +//s_cmov_b64 s[2:3], 1.0
> +//CHECK-FIXME: s_cmov_b64 s[2:3], 1.0 ; encoding: [0xf2,0x05,0x82,0xb3]
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +s_mov_b32 s1, s2
> +// CHECK: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe]
> +
> +s_mov_b64 s[2:3], s[4:5]
> +// CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
> +
> +s_cmov_b32 s1, s2
> +// CHECK: s_cmov_b32 s1, s2 ; encoding: [0x02,0x05,0x81,0xbe]
> +
> +s_cmov_b64 s[2:3], s[4:5]
> +// CHECK: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x06,0x82,0xbe]
> +
> +s_not_b32 s1, s2
> +// CHECK: s_not_b32 s1, s2 ; encoding: [0x02,0x07,0x81,0xbe]
> +
> +s_not_b64 s[2:3], s[4:5]
> +// CHECK: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x08,0x82,0xbe]
> +
> +s_wqm_b32 s1, s2
> +// CHECK: s_wqm_b32 s1, s2 ; encoding: [0x02,0x09,0x81,0xbe]
> +
> +s_wqm_b64 s[2:3], s[4:5]
> +// CHECK: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0a,0x82,0xbe]
> +
> +s_brev_b32 s1, s2
> +// CHECK: s_brev_b32 s1, s2 ; encoding: [0x02,0x0b,0x81,0xbe]
> +
> +s_brev_b64 s[2:3], s[4:5]
> +// CHECK: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0c,0x82,0xbe]
> +
> +s_bcnt0_i32_b32 s1, s2
> +// CHECK: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0d,0x81,0xbe]
> +
> +s_bcnt0_i32_b64 s1, s[2:3]
> +// CHECK: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0e,0x81,0xbe]
> +
> +s_bcnt1_i32_b32 s1, s2
> +// CHECK: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0f,0x81,0xbe]
> +
> +s_bcnt1_i32_b64 s1, s[2:3]
> +// CHECK: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x10,0x81,0xbe]
> +
> +s_ff0_i32_b32 s1, s2
> +// CHECK: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x11,0x81,0xbe]
> +
> +s_ff0_i32_b64 s1, s[2:3]
> +// CHECK: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x12,0x81,0xbe]
> +
> +s_ff1_i32_b32 s1, s2
> +// CHECK: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x13,0x81,0xbe]
> +
> +s_ff1_i32_b64 s1, s[2:3]
> +// CHECK: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x14,0x81,0xbe]
> +
> +s_flbit_i32_b32 s1, s2
> +// CHECK: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x15,0x81,0xbe]
> +
> +s_flbit_i32_b64 s1, s[2:3]
> +// CHECK: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x16,0x81,0xbe]
> +
> +s_flbit_i32 s1, s2
> +// CHECK: s_flbit_i32 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe]
> +
> +s_flbit_i32_i64 s1, s[2:3]
> +// CHECK: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x18,0x81,0xbe]
> +
> +s_sext_i32_i8 s1, s2
> +// CHECK: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x19,0x81,0xbe]
> +
> +s_sext_i32_i16 s1, s2
> +// CHECK: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe]
> +
> +s_bitset0_b32 s1, s2
> +// CHECK: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x1b,0x81,0xbe]
> +
> +s_bitset0_b64 s[2:3], s[4:5]
> +// CHECK: s_bitset0_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1c,0x82,0xbe]
> +
> +s_bitset1_b32 s1, s2
> +// CHECK: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1d,0x81,0xbe]
> +
> +s_bitset1_b64 s[2:3], s[4:5]
> +// CHECK: s_bitset1_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1e,0x82,0xbe]
> +
> +s_getpc_b64 s[2:3]
> +// CHECK: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1f,0x82,0xbe]
> +
> +s_setpc_b64 s[2:3], s[4:5]
> +// CHECK: s_setpc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x20,0x82,0xbe]
> +
> +s_swappc_b64 s[2:3], s[4:5]
> +// CHECK: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe]
> +
> +s_rfe_b64 s[2:3], s[4:5]
> +// CHECK: s_rfe_b64 s[2:3], s[4:5] ; encoding: [0x04,0x22,0x82,0xbe]
> +
> +s_and_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe]
> +
> +s_or_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe]
> +
> +s_xor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe]
> +
> +s_andn2_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe]
> +
> +s_orn2_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x28,0x82,0xbe]
> +
> +s_nand_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe]
> +
> +s_nor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2a,0x82,0xbe]
> +
> +s_xnor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe]
> +
> +s_quadmask_b32 s1, s2
> +// CHECK: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe]
> +
> +s_quadmask_b64 s[2:3], s[4:5]
> +// CHECK: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe]
> +
> +s_movrels_b32 s1, s2
> +// CHECK: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2e,0x81,0xbe]
> +
> +s_movrels_b64 s[2:3], s[4:5]
> +// CHECK: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2f,0x82,0xbe]
> +
> +s_movreld_b32 s1, s2
> +// CHECK: s_movreld_b32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe]
> +
> +s_movreld_b64 s[2:3], s[4:5]
> +// CHECK: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x31,0x82,0xbe]
> +
> +s_cbranch_join s[4:5]
> +// CHECK: s_cbranch_join s[4:5] ; encoding: [0x04,0x32,0x80,0xbe]
> +
> +s_abs_i32 s1, s2
> +// CHECK: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe]
> +
> +s_mov_fed_b32 s1, s2
> +// CHECK: s_mov_fed_b32 s1, s2 ; encoding: [0x02,0x35,0x81,0xbe]
> diff --git a/test/MC/R600/sop2.s b/test/MC/R600/sop2.s
> new file mode 100644
> index 0000000..9a7a1c0
> --- /dev/null
> +++ b/test/MC/R600/sop2.s
> @@ -0,0 +1,131 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +// CHECK: s_add_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x80]
> +s_add_u32 s1, s2, s3
> +
> +// CHECK: s_sub_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x80]
> +s_sub_u32 s1, s2, s3
> +
> +// CHECK: s_add_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x81]
> +s_add_i32 s1, s2, s3
> +
> +// CHECK: s_sub_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x81]
> +s_sub_i32 s1, s2, s3
> +
> +// CHECK: s_addc_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x82]
> +s_addc_u32 s1, s2, s3
> +
> +// CHECK: s_subb_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x82]
> +s_subb_u32 s1, s2, s3
> +
> +// CHECK: s_min_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x83]
> +s_min_i32 s1, s2, s3
> +
> +// CHECK: s_min_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x83]
> +s_min_u32 s1, s2, s3
> +
> +// CHECK: s_max_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x84]
> +s_max_i32 s1, s2, s3
> +
> +// CHECK: s_max_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x84]
> +s_max_u32 s1, s2, s3
> +
> +// CHECK: s_cselect_b32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x85]
> +s_cselect_b32 s1, s2, s3
> +
> +// CHECK: s_cselect_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x85]
> +s_cselect_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_and_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x87]
> +s_and_b32 s2, s4, s6
> +
> +// CHECK: s_and_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x87]
> +s_and_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_or_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x88]
> +s_or_b32 s2, s4, s6
> +
> +// CHECK: s_or_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x88]
> +s_or_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_xor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x89]
> +s_xor_b32 s2, s4, s6
> +
> +// CHECK: s_xor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x89]
> +s_xor_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_andn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8a]
> +s_andn2_b32 s2, s4, s6
> +
> +// CHECK: s_andn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8a]
> +s_andn2_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_orn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8b]
> +s_orn2_b32 s2, s4, s6
> +
> +// CHECK: s_orn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8b]
> +s_orn2_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_nand_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8c]
> +s_nand_b32 s2, s4, s6
> +
> +// CHECK: s_nand_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8c]
> +s_nand_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_nor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8d]
> +s_nor_b32 s2, s4, s6
> +
> +// CHECK: s_nor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8d]
> +s_nor_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_xnor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8e]
> +s_xnor_b32 s2, s4, s6
> +
> +// CHECK: s_xnor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8e]
> +s_xnor_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_lshl_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8f]
> +s_lshl_b32 s2, s4, s6
> +
> +// CHECK: s_lshl_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8f]
> +s_lshl_b64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_lshr_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x90]
> +s_lshr_b32 s2, s4, s6
> +
> +// CHECK: s_lshr_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x90]
> +s_lshr_b64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_ashr_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x91]
> +s_ashr_i32 s2, s4, s6
> +
> +// CHECK: s_ashr_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x91]
> +s_ashr_i64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_bfm_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x92]
> +s_bfm_b32 s2, s4, s6
> +
> +// CHECK: s_bfm_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x92]
> +s_bfm_b64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_mul_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x93]
> +s_mul_i32 s2, s4, s6
> +
> +// CHECK: s_bfe_u32 s2, s4, s6 ; encoding: [0x04,0x06,0x82,0x93]
> +s_bfe_u32 s2, s4, s6
> +
> +// CHECK: s_bfe_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x94]
> +s_bfe_i32 s2, s4, s6
> +
> +// CHECK: s_bfe_u64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x94]
> +s_bfe_u64 s[2:3], s[4:5], s[6:7]
> +
> +// CHECK: s_bfe_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x02,0x95]
> +s_bfe_i64 s[2:3], s[4:5], s6
> +
> +// CHECK: s_cbranch_g_fork s[4:5], s[6:7] ; encoding: [0x04,0x06,0x80,0x95]
> +s_cbranch_g_fork s[4:5], s[6:7]
> +
> +// CHECK: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x96]
> +s_absdiff_i32 s2, s4, s6
> diff --git a/test/MC/R600/sopc.s b/test/MC/R600/sopc.s
> new file mode 100644
> index 0000000..0899c1a
> --- /dev/null
> +++ b/test/MC/R600/sopc.s
> @@ -0,0 +1,9 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +s_cmp_eq_i32 s1, s2
> +// CHECK: s_cmp_eq_i32 s1, s2 ; encoding: [0x01,0x02,0x00,0xbf]
> diff --git a/test/MC/R600/sopk.s b/test/MC/R600/sopk.s
> new file mode 100644
> index 0000000..6c27aac
> --- /dev/null
> +++ b/test/MC/R600/sopk.s
> @@ -0,0 +1,66 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +s_movk_i32 s2, 0x6
> +// CHECK: s_movk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb0]
> +
> +s_cmovk_i32 s2, 0x6
> +// CHECK: s_cmovk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb1]
> +
> +s_cmpk_eq_i32 s2, 0x6
> +// CHECK: s_cmpk_eq_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb1]
> +
> +s_cmpk_lg_i32 s2, 0x6
> +// CHECK: s_cmpk_lg_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb2]
> +
> +s_cmpk_gt_i32 s2, 0x6
> +// CHECK: s_cmpk_gt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb2]
> +
> +s_cmpk_ge_i32 s2, 0x6
> +// CHECK: s_cmpk_ge_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb3]
> +
> +s_cmpk_lt_i32 s2, 0x6
> +// CHECK: s_cmpk_lt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb3]
> +
> +s_cmpk_le_i32 s2, 0x6
> +// CHECK: s_cmpk_le_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb4]
> +
> +s_cmpk_eq_u32 s2, 0x6
> +// CHECK: s_cmpk_eq_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb4]
> +
> +s_cmpk_lg_u32 s2, 0x6
> +// CHECK: s_cmpk_lg_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb5]
> +
> +s_cmpk_gt_u32 s2, 0x6
> +// CHECK: s_cmpk_gt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb5]
> +
> +s_cmpk_ge_u32 s2, 0x6
> +// CHECK: s_cmpk_ge_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb6]
> +
> +s_cmpk_lt_u32 s2, 0x6
> +// CHECK: s_cmpk_lt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb6]
> +
> +s_cmpk_le_u32 s2, 0x6
> +// CHECK: s_cmpk_le_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb7]
> +
> +s_addk_i32 s2, 0x6
> +// CHECK: s_addk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb7]
> +
> +s_mulk_i32 s2, 0x6
> +// CHECK: s_mulk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb8]
> +
> +s_cbranch_i_fork s[2:3], 0x6
> +// CHECK: s_cbranch_i_fork s[2:3], 0x6 ; encoding: [0x06,0x00,0x82,0xb8]
> +
> +s_getreg_b32 s2, 0x6
> +// CHECK: s_getreg_b32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb9]
> +
> +s_setreg_b32 s2, 0x6
> +// CHECK: s_setreg_b32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb9]
> +
> +s_setreg_imm32_b32 0xff, 0x6
> +// CHECK: s_setreg_imm32_b32 0xff, 0x6 ; encoding: [0x06,0x00,0x80,0xba,0xff,0x00,0x00,0x00]
> diff --git a/test/MC/R600/sopp.s b/test/MC/R600/sopp.s
> index 0f186b1..b072c16 100644
> --- a/test/MC/R600/sopp.s
> +++ b/test/MC/R600/sopp.s
> @@ -1,4 +1,16 @@
> -// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Edge Cases
> +//===----------------------------------------------------------------------===//
> +
> +s_nop 0 // CHECK: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
> +s_nop 0xffff // CHECK: s_nop 0xffff ; encoding: [0xff,0xff,0x80,0xbf]
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
>
> s_nop 1 // CHECK: s_nop 1 ; encoding: [0x01,0x00,0x80,0xbf]
> s_endpgm // CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
> diff --git a/test/MC/R600/vop1.s b/test/MC/R600/vop1.s
> new file mode 100644
> index 0000000..9c9a6b2
> --- /dev/null
> +++ b/test/MC/R600/vop1.s
> @@ -0,0 +1,182 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +// CHECK: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
> +v_nop
> +
> +// CHECK: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e]
> +v_mov_b32 v1, v2
> +
> +// CHECK: v_readfirstlane_b32 s1, v2 ; encoding: [0x02,0x05,0x02,0x7e]
> +v_readfirstlane_b32 s1, v2
> +
> +// CHECK: v_cvt_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x07,0x02,0x7e]
> +v_cvt_i32_f64 v1, v[2:3]
> +
> +// CHECK: v_cvt_f64_i32_e32 v[1:2], v2 ; encoding: [0x02,0x09,0x02,0x7e]
> +v_cvt_f64_i32 v[1:2], v2
> +
> +// CHECK: v_cvt_f32_i32_e32 v1, v2 ; encoding: [0x02,0x0b,0x02,0x7e]
> +v_cvt_f32_i32 v1, v2
> +
> +// CHECK: v_cvt_f32_u32_e32 v1, v2 ; encoding: [0x02,0x0d,0x02,0x7e]
> +v_cvt_f32_u32 v1, v2
> +
> +// CHECK: v_cvt_u32_f32_e32 v1, v2 ; encoding: [0x02,0x0f,0x02,0x7e
> +v_cvt_u32_f32 v1, v2
> +
> +// CHECK: v_cvt_i32_f32_e32 v1, v2 ; encoding: [0x02,0x11,0x02,0x7e]
> +v_cvt_i32_f32 v1, v2
> +
> +// CHECK: v_mov_fed_b32_e32 v1, v2 ; encoding: [0x02,0x13,0x02,0x7e]
> +v_mov_fed_b32 v1, v2
> +
> +// CHECK: v_cvt_f16_f32_e32 v1, v2 ; encoding: [0x02,0x15,0x02,0x7e]
> +v_cvt_f16_f32 v1, v2
> +
> +// CHECK: v_cvt_f32_f16_e32 v1, v2 ; encoding: [0x02,0x17,0x02,0x7e]
> +v_cvt_f32_f16 v1, v2
> +
> +// CHECK: v_cvt_rpi_i32_f32_e32 v1, v2 ; encoding: [0x02,0x19,0x02,0x7e]
> +v_cvt_rpi_i32_f32 v1, v2
> +
> +// CHECK: v_cvt_flr_i32_f32_e32 v1, v2 ; encoding: [0x02,0x1b,0x02,0x7e]
> +v_cvt_flr_i32_f32 v1, v2
> +
> +// CHECK: v_cvt_off_f32_i4_e32 v1, v2 ; encoding: [0x02,0x1d,0x02,0x7e]
> +v_cvt_off_f32_i4_e32 v1, v2
> +
> +// CHECK: v_cvt_f32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x1f,0x02,0x7e]
> +v_cvt_f32_f64 v1, v[2:3]
> +
> +// CHECK: v_cvt_f64_f32_e32 v[1:2], v2 ; encoding: [0x02,0x21,0x02,0x7e]
> +v_cvt_f64_f32 v[1:2], v2
> +
> +// CHECK: v_cvt_f32_ubyte0_e32 v1, v2 ; encoding: [0x02,0x23,0x02,0x7e]
> +v_cvt_f32_ubyte0 v1, v2
> +
> +// CHECK: v_cvt_f32_ubyte1_e32 v1, v2 ; encoding: [0x02,0x25,0x02,0x7e]
> +v_cvt_f32_ubyte1_e32 v1, v2
> +
> +// CHECK: v_cvt_f32_ubyte2_e32 v1, v2 ; encoding: [0x02,0x27,0x02,0x7e]
> +v_cvt_f32_ubyte2 v1, v2
> +
> +// CHECK: v_cvt_f32_ubyte3_e32 v1, v2 ; encoding: [0x02,0x29,0x02,0x7e]
> +v_cvt_f32_ubyte3 v1, v2
> +
> +// CHECK: v_cvt_u32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x2b,0x02,0x7e]
> +v_cvt_u32_f64 v1, v[2:3]
> +
> +// CHECK: v_cvt_f64_u32_e32 v[1:2], v2 ; encoding: [0x02,0x2d,0x02,0x7e]
> +v_cvt_f64_u32 v[1:2], v2
> +
> +// CHECK: v_fract_f32_e32 v1, v2 ; encoding: [0x02,0x41,0x02,0x7e]
> +v_fract_f32 v1, v2
> +
> +// CHECK: v_trunc_f32_e32 v1, v2 ; encoding: [0x02,0x43,0x02,0x7e]
> +v_trunc_f32 v1, v2
> +
> +// CHECK: v_ceil_f32_e32 v1, v2 ; encoding: [0x02,0x45,0x02,0x7e]
> +v_ceil_f32 v1, v2
> +
> +// CHECK: v_rndne_f32_e32 v1, v2 ; encoding: [0x02,0x47,0x02,0x7e]
> +v_rndne_f32 v1, v2
> +
> +// CHECK: v_floor_f32_e32 v1, v2 ; encoding: [0x02,0x49,0x02,0x7e]
> +v_floor_f32_e32 v1, v2
> +
> +// CHECK: v_exp_f32_e32 v1, v2 ; encoding: [0x02,0x4b,0x02,0x7e]
> +v_exp_f32 v1, v2
> +
> +// CHECK: v_log_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x4d,0x02,0x7e]
> +v_log_clamp_f32 v1, v2
> +
> +// CHECK: v_log_f32_e32 v1, v2 ; encoding: [0x02,0x4f,0x02,0x7e]
> +v_log_f32 v1, v2
> +
> +// CHECK: v_rcp_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x51,0x02,0x7e]
> +v_rcp_clamp_f32 v1, v2
> +
> +// CHECK: v_rcp_legacy_f32_e32 v1, v2 ; encoding: [0x02,0x53,0x02,0x7e]
> +v_rcp_legacy_f32 v1, v2
> +
> +// CHECK: v_rcp_f32_e32 v1, v2 ; encoding: [0x02,0x55,0x02,0x7e]
> +v_rcp_f32 v1, v2
> +
> +// CHECK: v_rcp_iflag_f32_e32 v1, v2 ; encoding: [0x02,0x57,0x02,0x7e]
> +v_rcp_iflag_f32 v1, v2
> +
> +// CHECK: v_rsq_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x59,0x02,0x7e]
> +v_rsq_clamp_f32 v1, v2
> +
> +// CHECK: v_rsq_legacy_f32_e32 v1, v2 ; encoding: [0x02,0x5b,0x02,0x7e]
> +v_rsq_legacy_f32 v1, v2
> +
> +// CHECK: v_rsq_f32_e32 v1, v2 ; encoding: [0x02,0x5d,0x02,0x7e]
> +v_rsq_f32_e32 v1, v2
> +
> +// CHECK: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x5f,0x02,0x7e]
> +v_rcp_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_rcp_clamp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x61,0x02,0x7e]
> +v_rcp_clamp_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_rsq_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x63,0x02,0x7e]
> +v_rsq_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_rsq_clamp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x65,0x02,0x7e]
> +v_rsq_clamp_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_sqrt_f32_e32 v1, v2 ; encoding: [0x02,0x67,0x02,0x7e]
> +v_sqrt_f32 v1, v2
> +
> +// CHECK: v_sqrt_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x69,0x02,0x7e]
> +v_sqrt_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_sin_f32_e32 v1, v2 ; encoding: [0x02,0x6b,0x02,0x7e]
> +v_sin_f32 v1, v2
> +
> +// CHECK: v_cos_f32_e32 v1, v2 ; encoding: [0x02,0x6d,0x02,0x7e]
> +v_cos_f32 v1, v2
> +
> +// CHECK: v_not_b32_e32 v1, v2 ; encoding: [0x02,0x6f,0x02,0x7e]
> +v_not_b32 v1, v2
> +
> +// CHECK: v_bfrev_b32_e32 v1, v2 ; encoding: [0x02,0x71,0x02,0x7e]
> +v_bfrev_b32 v1, v2
> +
> +// CHECK: v_ffbh_u32_e32 v1, v2 ; encoding: [0x02,0x73,0x02,0x7e]
> +v_ffbh_u32 v1, v2
> +
> +// CHECK: v_ffbl_b32_e32 v1, v2 ; encoding: [0x02,0x75,0x02,0x7e]
> +v_ffbl_b32 v1, v2
> +
> +// CHECK: v_ffbh_i32_e32 v1, v2 ; encoding: [0x02,0x77,0x02,0x7e]
> +v_ffbh_i32_e32 v1, v2
> +
> +// CHECK: v_frexp_exp_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x79,0x02,0x7e]
> +v_frexp_exp_i32_f64 v1, v[2:3]
> +
> +// CHECK: v_frexp_mant_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7b,0x02,0x7e]
> +v_frexp_mant_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_fract_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7d,0x02,0x7e]
> +v_fract_f64 v[1:2], v[2:3]
> +
> +// CHECK: v_frexp_exp_i32_f32_e32 v1, v2 ; encoding: [0x02,0x7f,0x02,0x7e]
> +v_frexp_exp_i32_f32 v1, v2
> +
> +// CHECK: v_frexp_mant_f32_e32 v1, v2 ; encoding: [0x02,0x81,0x02,0x7e]
> +v_frexp_mant_f32 v1, v2
> +
> +// CHECK: v_clrexcp ; encoding: [0x00,0x82,0x00,0x7e]
> +v_clrexcp
> +
> +// CHECK: v_movreld_b32_e32 v1, v2 ; encoding: [0x02,0x85,0x02,0x7e]
> +v_movreld_b32 v1, v2
> +
> +// CHECK: v_movrels_b32_e32 v1, v2 ; encoding: [0x02,0x87,0x02,0x7e]
> +v_movrels_b32 v1, v2
> +
> +// CHECK: v_movrelsd_b32_e32 v1, v2 ; encoding: [0x02,0x89,0x02,0x7e]
> +v_movrelsd_b32 v1, v2
> diff --git a/test/MC/R600/vop2-err.s b/test/MC/R600/vop2-err.s
> new file mode 100644
> index 0000000..a113100
> --- /dev/null
> +++ b/test/MC/R600/vop2-err.s
> @@ -0,0 +1,35 @@
> +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
> +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Generic checks
> +//===----------------------------------------------------------------------===//
> +
> +v_mul_i32_i24 v1, v2, 100
> +// CHECK: error: invalid operand for instruction
> +
> +//===----------------------------------------------------------------------===//
> +// _e32 checks
> +//===----------------------------------------------------------------------===//
> +
> +// Immediate src1
> +v_mul_i32_i24_e32 v1, v2, 100
> +// CHECK: error: invalid operand for instruction
> +
> +// sgpr src1
> +v_mul_i32_i24_e32 v1, v2, s3
> +// CHECK: error: invalid operand for instruction
> +
> +//===----------------------------------------------------------------------===//
> +// _e64 checks
> +//===----------------------------------------------------------------------===//
> +
> +// Immediate src0
> +v_mul_i32_i24_e64 v1, 100, v3
> +// CHECK: error: invalid operand for instruction
> +
> +// Immediate src1
> +v_mul_i32_i24_e64 v1, v2, 100
> +// CHECK: error: invalid operand for instruction
> +
> +// TODO: Constant bus restrictions
> diff --git a/test/MC/R600/vop2.s b/test/MC/R600/vop2.s
> new file mode 100644
> index 0000000..6780088
> --- /dev/null
> +++ b/test/MC/R600/vop2.s
> @@ -0,0 +1,242 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Generic Checks for floating-point instructions (These have modifiers).
> +//===----------------------------------------------------------------------===//
> +
> +// TODO: 64-bit encoding of instructions with modifiers
> +
> +// _e32 suffix
> +// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
> +v_add_f32_e32 v1, v2, v3
> +
> +// src0 inline immediate
> +// CHECK: v_add_f32_e32 v1, 1.0, v3 ; encoding: [0xf2,0x06,0x02,0x06]
> +v_add_f32 v1, 1.0, v3
> +
> +// src0 negative inline immediate
> +// CHECK: v_add_f32_e32 v1, -1.0, v3 ; encoding: [0xf3,0x06,0x02,0x06]
> +v_add_f32 v1, -1.0, v3
> +
> +// src0 literal
> +// CHECK: v_add_f32_e32 v1, 0x42c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0x42]
> +v_add_f32 v1, 100.0, v3
> +
> +// src0 negative literal
> +// CHECK: v_add_f32_e32 v1, 0xc2c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0xc2]
> +v_add_f32 v1, -100.0, v3
> +
> +//===----------------------------------------------------------------------===//
> +// Generic Checks for integer instructions (These don't have modifiers).
> +//===----------------------------------------------------------------------===//
> +
> +// _e32 suffix
> +// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12]
> +v_mul_i32_i24_e32 v1, v2, v3
> +
> +// _e64 suffix
> +// CHECK: v_mul_i32_i24_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x02,0x00]
> +v_mul_i32_i24_e64 v1, v2, v3
> +
> +// src0 inline
> +// CHECK: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12]
> +v_mul_i32_i24 v1, 3, v3
> +
> +// src0 negative inline
> +// CHECK: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12]
> +v_mul_i32_i24 v1, -3, v3
> +
> +// src1 inline
> +// CHECK: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00]
> +v_mul_i32_i24 v1, v2, 3
> +
> +// src1 negative inline
> +// CHECK: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00]
> +v_mul_i32_i24 v1, v2, -3
> +
> +// src0 literal
> +// CHECK: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x12,0x64,0x00,0x00,0x00]
> +v_mul_i32_i24 v1, 100, v3
> +
> +// src1 negative literal
> +// CHECK: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x12,0x9c,0xff,0xff,0xff]
> +v_mul_i32_i24 v1, -100, v3
> +
> +//===----------------------------------------------------------------------===//
> +// Checks for legal operands
> +//===----------------------------------------------------------------------===//
> +
> +// src0 sgpr
> +// CHECK: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x12]
> +v_mul_i32_i24 v1, s2, v3
> +
> +// src1 sgpr
> +// CHECK: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x00,0x00]
> +v_mul_i32_i24 v1, v2, s3
> +
> +// src0, src1 same sgpr
> +// CHECK: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x04,0x00,0x00]
> +v_mul_i32_i24 v1, s2, s2
> +
> +// src0 sgpr, src1 inline
> +// CHECK: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x06,0x01,0x00]
> +v_mul_i32_i24 v1, s2, 3
> +
> +// src0 inline src1 sgpr
> +// CHECK: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00]
> +v_mul_i32_i24 v1, 3, s3
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +// CHECK: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00]
> +v_cndmask_b32 v1, v2, v3
> +
> +// CHECK: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02]
> +v_readlane_b32 s1, v2, s3
> +
> +// CHECK: v_writelane_b32 v1, s2, s3 ; encoding: [0x02,0x06,0x02,0x04]
> +v_writelane_b32 v1, s2, s3
> +
> +// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
> +v_add_f32 v1, v2, v3
> +
> +// CHECK: v_sub_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x08]
> +v_sub_f32 v1, v2, v3
> +
> +// CHECK: v_subrev_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0a]
> +v_subrev_f32 v1, v2, v3
> +
> +// CHECK: v_mac_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c]
> +v_mac_legacy_f32 v1, v2, v3
> +
> +// CHECK: v_mul_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e]
> +v_mul_legacy_f32_e32 v1, v2, v3
> +
> +// CHECK: v_mul_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10]
> +v_mul_f32 v1, v2, v3
> +
> +// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12]
> +v_mul_i32_i24 v1, v2, v3
> +
> +// CHECK: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14]
> +v_mul_hi_i32_i24 v1, v2, v3
> +
> +// CHECK: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16]
> +v_mul_u32_u24 v1, v2, v3
> +
> +// CHECK: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18]
> +v_mul_hi_u32_u24 v1, v2, v3
> +
> +// CHECK: v_min_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a]
> +v_min_legacy_f32_e32 v1, v2, v3
> +
> +// CHECK: v_max_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c]
> +v_max_legacy_f32 v1, v2, v3
> +
> +// CHECK: v_min_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e]
> +v_min_f32_e32 v1, v2, v3
> +
> +// CHECK: v_max_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20]
> +v_max_f32 v1, v2 v3
> +
> +// CHECK: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22]
> +v_min_i32 v1, v2, v3
> +
> +// CHECK: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24]
> +v_max_i32 v1, v2, v3
> +
> +// CHECK: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26]
> +v_min_u32 v1, v2, v3
> +
> +// CHECK: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28]
> +v_max_u32 v1, v2, v3
> +
> +// CHECK: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a]
> +v_lshr_b32 v1, v2, v3
> +
> +// CHECK: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c]
> +v_lshrrev_b32 v1, v2, v3
> +
> +// CHECK: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e]
> +v_ashr_i32 v1, v2, v3
> +
> +// CHECK: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30]
> +v_ashrrev_i32 v1, v2, v3
> +
> +// CHECK: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
> +v_lshl_b32_e32 v1, v2, v3
> +
> +// CHECK: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
> +v_lshlrev_b32 v1, v2, v3
> +
> +// CHECK: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
> +v_and_b32 v1, v2, v3
> +
> +// CHECK: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
> +v_or_b32 v1, v2, v3
> +
> +// CHECK: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
> +v_xor_b32 v1, v2, v3
> +
> +// CHECK: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
> +v_bfm_b32 v1, v2, v3
> +
> +// CHECK: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e]
> +v_mac_f32 v1, v2, v3
> +
> +// CHECK: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42]
> +v_madmk_f32 v1, v2, v3, 64.0
> +
> +// CHECK: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42]
> +v_madak_f32 v1, v2, v3, 64.0
> +
> +// CHECK: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44]
> +v_bcnt_u32_b32 v1, v2, v3
> +
> +// CHECK: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46]
> +v_mbcnt_lo_u32_b32 v1, v2, v3
> +
> +// CHECK: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48]
> +v_mbcnt_hi_u32_b32_e32 v1, v2, v3
> +
> +// CHECK: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
> +v_add_i32 v1, v2, v3
> +
> +// CHECK: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
> +v_sub_i32_e32 v1, v2, v3
> +
> +// CHECK: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
> +v_subrev_i32 v1, v2, v3
> +
> +// CHECK : v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
> +v_addc_u32 v1, v2, v3
> +
> +// CHECK: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
> +v_subb_u32 v1, v2, v3
> +
> +// CHECK: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
> +v_subbrev_u32 v1, v2, v3
> +
> +// CHECK: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
> +v_ldexp_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58]
> +v_cvt_pkaccum_u8_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a]
> +v_cvt_pknorm_i16_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c]
> +v_cvt_pknorm_u16_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e]
> +v_cvt_pkrtz_f16_f32 v1, v2, v3
> +
> +// CHECK: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60]
> +v_cvt_pk_u16_u32_e32 v1, v2, v3
> +
> +// CHECK: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62]
> +v_cvt_pk_i16_i32 v1, v2, v3
> diff --git a/test/MC/R600/vop3.s b/test/MC/R600/vop3.s
> new file mode 100644
> index 0000000..7d1ba0b
> --- /dev/null
> +++ b/test/MC/R600/vop3.s
> @@ -0,0 +1,138 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// VOPC Instructions
> +//===----------------------------------------------------------------------===//
> +
> +//
> +// Modifier tests:
> +//
> +
> +v_cmp_lt_f32 s[2:3] -v4, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x20]
> +
> +v_cmp_lt_f32 s[2:3] v4, -v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
> +
> +v_cmp_lt_f32 s[2:3] -v4, -v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x60]
> +
> +v_cmp_lt_f32 s[2:3] |v4|, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3] v4, |v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3] |v4|, |v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3] -|v4|, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x20]
> +
> +v_cmp_lt_f32 s[2:3] v4, -|v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x40]
> +
> +v_cmp_lt_f32 s[2:3] -|v4|, -|v6|
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x60]
> +
> +//
> +// Instruction tests:
> +//
> +
> +v_cmp_f_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lt_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_eq_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x04,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_le_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x06,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_gt_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x08,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_lg_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0a,0xd0,0x04,0x0d,0x02,0x00]
> +
> +v_cmp_ge_f32 s[2:3], v4, v6
> +// CHECK: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00]
> +
> +// TODO: Finish VOPC
> +
> +//===----------------------------------------------------------------------===//
> +// VOP1 Instructions
> +//===----------------------------------------------------------------------===//
> +
> +//
> +// Modifier tests:
> +//
> +
> +v_fract_f32 v1, -v2
> +// CHECK: v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x20]
> +
> +v_fract_f32 v1, |v2|
> +// CHECK: v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x00]
> +
> +v_fract_f32 v1, -|v2|
> +// CHECK: v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x20]
> +
> +v_fract_f32 v1, v2 clamp
> +// CHECK: v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x00]
> +
> +v_fract_f32 v1, v2 mul:2
> +// CHECK: v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x08]
> +
> +v_fract_f32 v1, v2, div:2 clamp
> +// CHECK: v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x18]
> +
> +// TODO: Finish VOP1
> +
> +///===---------------------------------------------------------------------===//
> +// VOP2 Instructions
> +///===---------------------------------------------------------------------===//
> +
> +// TODO: Modifier tests
> +
> +v_cndmask_b32 v1, v3, v5, s[4:5]
> +// CHECK: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
> +
> +//TODO: readlane, writelane
> +
> +v_add_f32 v1, v3, s5
> +// CHECK: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_sub_f32 v1, v3, s5
> +// CHECK: v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x08,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_subrev_f32 v1, v3, s5
> +// CHECK: v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0a,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mac_legacy_f32 v1, v3, s5
> +// CHECK: v_mac_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mul_legacy_f32 v1, v3, s5
> +// CHECK: v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0e,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mul_f32 v1, v3, s5
> +// CHECK: v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x10,0xd2,0x03,0x0b,0x00,0x00]
> +
> +v_mul_i32_i24 v1, v3, s5
> +// CHECK: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00]
> +
> +///===---------------------------------------------------------------------===//
> +// VOP3 Instructions
> +///===---------------------------------------------------------------------===//
> +
> +// TODO: Modifier tests
> +
> +v_mad_legacy_f32 v2, v4, v6, v8
> +// CHECK: v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0x80,0xd2,0x04,0x0d,0x22,0x04]
> +
> +
> +
> +
> +
> diff --git a/test/MC/R600/vopc.s b/test/MC/R600/vopc.s
> new file mode 100644
> index 0000000..f44919a
> --- /dev/null
> +++ b/test/MC/R600/vopc.s
> @@ -0,0 +1,40 @@
> +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
> +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +//===----------------------------------------------------------------------===//
> +// Generic Checks
> +//===----------------------------------------------------------------------===//
> +
> +// src0 sgpr
> +v_cmp_lt_f32 vcc, s2, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, s2, v4 ; encoding: [0x02,0x08,0x02,0x7c]
> +
> +// src0 inline immediate
> +v_cmp_lt_f32 vcc, 0, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, 0, v4 ; encoding: [0x80,0x08,0x02,0x7c]
> +
> +// src0 literal
> +v_cmp_lt_f32 vcc, 10.0, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, 0x41200000, v4 ; encoding: [0xff,0x08,0x02,0x7c,0x00,0x00,0x20,0x41]
> +
> +// src0, src1 max vgpr
> +v_cmp_lt_f32 vcc, v255, v255
> +// CHECK: v_cmp_lt_f32_e32 vcc, v255, v255 ; encoding: [0xff,0xff,0x03,0x7c]
> +
> +// force 32-bit encoding
> +v_cmp_lt_f32_e32 vcc, v2, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
> +
> +
> +//===----------------------------------------------------------------------===//
> +// Instructions
> +//===----------------------------------------------------------------------===//
> +
> +v_cmp_f_f32 vcc, v2, v4
> +// CHECK: v_cmp_f_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x00,0x7c]
> +
> +v_cmp_lt_f32 vcc, v2, v4
> +// CHECK: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
> +
> +// TODO: Add tests for the rest of the instructions.
> +
> --
> 1.8.1.5
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list