PATCHES: R600/SI: Enable SOP1 and s_load_* instructions in the assembler
Matt Arsenault
Matthew.Arsenault at amd.com
Thu Feb 12 10:08:39 PST 2015
On 02/11/2015 11:41 AM, Tom Stellard wrote:
> Hi,
>
> These patches add support for more scalar instructions to the assembler.
>
> -Tom
>
> 0001-R600-SI-Remove-some-unused-TableGen-classes.patch
>
>
> From 2c64f439bcddcd154be74cae2faaccb8920a7b0a Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Thu, 11 Dec 2014 16:18:02 -0500
> Subject: [PATCH 1/5] R600/SI: Remove some unused TableGen classes
>
> ---
> lib/Target/R600/SIInstrInfo.td | 19 -------------------
> 1 file changed, 19 deletions(-)
>
LGTM
>
> 0002-R600-SI-Lowercase-register-names.patch
>
>
> From a24954e7cffcac84c0fca15dea128a5f8d083dc5 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 2 Dec 2014 23:33:30 -0500
> Subject: [PATCH 2/5] R600/SI: Lowercase register names
>
> ---
> lib/Target/R600/SIRegisterInfo.td | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
LGTM
>
> 0003-R600-SI-Refactor-SOP1-classes.patch
>
>
> From c0bbcac32c9c59b29b1eef3397568baa5ad2e11a Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 9 Dec 2014 16:47:37 -0500
> Subject: [PATCH 3/5] R600/SI: Refactor SOP1 classes
>
> ---
> lib/Target/R600/SIInstrInfo.td | 45 ++++++++++++++++++------------------------
> 1 file changed, 19 insertions(+), 26 deletions(-)
>
LGTM
> 0004-R600-SI-Add-assembler-support-for-s_load_dword-instr.patch
>
>
> From f87d5c404203037687629c05fddb8a5adc055784 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Fri, 14 Nov 2014 06:22:05 -0500
> Subject: [PATCH 4/5] R600/SI: Add assembler support for s_load_dword*
> instructions
>
> ---
> docs/R600Usage.rst | 4 +
> lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp | 161 +++++++++++++++++++++++---
> lib/Target/R600/SIInstrInfo.td | 4 +-
> test/MC/R600/smrd.s | 31 +++++
> 4 files changed, 185 insertions(+), 15 deletions(-)
> create mode 100644 test/MC/R600/smrd.s
>
> diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst
> index 48a30c8..2282d54 100644
> --- a/docs/R600Usage.rst
> +++ b/docs/R600Usage.rst
> @@ -15,6 +15,10 @@ Assembler
> The assembler is currently a work in progress and not yet complete. Below
> are the currently supported features.
>
> +SMRD Instructions
> +-----------------
> +The assembler currently supports only the s_load_dword* SMRD instructions.
> +
> SOPP Instructions
> -----------------
>
> diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> index 3b4ba1a..33cb2bb 100644
> --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> @@ -27,6 +27,7 @@
> #include "llvm/Support/SourceMgr.h"
> #include "llvm/Support/TargetRegistry.h"
> #include "llvm/Support/raw_ostream.h"
> +#include "llvm/Support/Debug.h"
>
> using namespace llvm;
>
> @@ -69,7 +70,8 @@ public:
> class AMDGPUOperand : public MCParsedAsmOperand {
> enum KindTy {
> Token,
> - Immediate
> + Immediate,
> + Register
> } Kind;
>
> public:
> @@ -84,16 +86,21 @@ public:
> int64_t Val;
> };
>
> + struct RegOp {
> + unsigned RegNo;
> + };
> +
> union {
> TokOp Tok;
> ImmOp Imm;
> + RegOp Reg;
> };
>
> void addImmOperands(MCInst &Inst, unsigned N) const {
> Inst.addOperand(MCOperand::CreateImm(getImm()));
> }
> void addRegOperands(MCInst &Inst, unsigned N) const {
> - llvm_unreachable("addRegOperands");
> + Inst.addOperand(MCOperand::CreateReg(getReg()));
> }
> StringRef getToken() const {
> return StringRef(Tok.Data, Tok.Length);
> @@ -111,11 +118,11 @@ public:
> }
>
> bool isReg() const override {
> - return false;
> + return Kind == Register;
> }
>
> unsigned getReg() const override {
> - return 0;
> + return Reg.RegNo;
> }
>
> bool isMem() const override {
> @@ -145,13 +152,125 @@ public:
> return Res;
> }
>
> + static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
> + SMLoc E) {
> + auto Op = llvm::make_unique<AMDGPUOperand>(Register);
> + Op->Reg.RegNo = RegNo;
> + return Op;
> + }
> +
> bool isSWaitCnt() const;
> };
>
> }
>
> +static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
> + if (IsVgpr) {
> + switch (RegWidth) {
> + default: llvm_unreachable("Unknown register width");
> + case 1: return AMDGPU::VGPR_32RegClassID;
> + case 2: return AMDGPU::VReg_64RegClassID;
> + case 3: return AMDGPU::VReg_96RegClassID;
> + case 4: return AMDGPU::VReg_128RegClassID;
> + case 8: return AMDGPU::VReg_256RegClassID;
> + case 16: return AMDGPU::VReg_512RegClassID;
> + }
> + } else {
No need for the else
> + switch (RegWidth) {
> + default: llvm_unreachable("Unknown register width");
> + case 1: return AMDGPU::SGPR_32RegClassID;
> + case 2: return AMDGPU::SGPR_64RegClassID;
> + case 4: return AMDGPU::SReg_128RegClassID;
> + case 8: return AMDGPU::SReg_256RegClassID;
> + case 16: return AMDGPU::SReg_512RegClassID;
> + }
> + }
> +}
> +
> bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
> - return true;
> + const AsmToken Tok = Parser.getTok();
> + StartLoc = Tok.getLoc();
> + EndLoc = Tok.getEndLoc();
> + const StringRef &RegName = Tok.getString();
> + RegNo = 0;
> +
> + // Handle special cases
> + if (RegName.equals("vcc_lo"))
> + RegNo = AMDGPU::VCC_LO;
> + else if (RegName.equals("vcc_hi"))
> + RegNo = AMDGPU::VCC_HI;
> + else if (RegName.equals("vcc"))
> + RegNo = AMDGPU::VCC;
> + else if (RegName.equals("exec_lo"))
> + RegNo = AMDGPU::EXEC_LO;
> + else if (RegName.equals("exec_hi"))
> + RegNo = AMDGPU::EXEC_HI;
> + else if (RegName.equals("exec"))
> + RegNo = AMDGPU::EXEC;
> + else if (RegName.equals("m0"))
> + RegNo = AMDGPU::M0;
> + else if (RegName.equals("flat_scr_lo"))
> + RegNo = AMDGPU::FLAT_SCR_LO;
> + else if (RegName.equals("flat_scr_hi"))
> + RegNo = AMDGPU::FLAT_SCR_HI;
> + else if (RegName.equals("flat_scr"))
> + RegNo = AMDGPU::FLAT_SCR;
> + else if (RegName.equals("scc"))
> + RegNo = AMDGPU::SCC;
I think this should be split into a separate function, use StringSwitch,
and be sorted so that the common registers come first. e.g. vcc and exec
are frequently used, but I've almost never seen the _lo/_hi on it.
> +
> + if (RegNo)
> + return false;
> +
> + // Match vgprs and sgprs
> + if (RegName[0] != 's' && RegName[0] != 'v')
> + return true;
> +
> + bool IsVgpr = RegName[0] == 'v';
> + unsigned RegWidth;
> + unsigned RegIndexInClass;
> + if (RegName.size() > 1) {
> + // We have a 32-bit register
> + RegWidth = 1;
> + if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
> + return true;
> + Parser.Lex();
> + } else {
> + // We have a register greater than 32-bits.
> +
> + int64_t RegLo, RegHi;
> + Parser.Lex();
> + if (getLexer().isNot(AsmToken::LBrac))
> + return true;
> +
> + Parser.Lex();
> + if (getParser().parseAbsoluteExpression(RegLo))
> + return true;
> +
> + if (getLexer().isNot(AsmToken::Colon))
> + return true;
> +
> + Parser.Lex();
> + if (getParser().parseAbsoluteExpression(RegHi))
> + return true;
> +
> + if (getLexer().isNot(AsmToken::RBrac))
> + return true;
> +
> + Parser.Lex();
> + RegWidth = (RegHi - RegLo) + 1;
> + if (IsVgpr) {
> + // VGPR registers aren't aligned.
> + RegIndexInClass = RegLo;
> + } else {
> + // SGPR registers are aligned. Max alignment is 4 dwords.
> + RegIndexInClass = RegLo / std::min(RegWidth, 4u);
> + }
> + }
> +
> + const MCRegisterInfo *TRC = getContext().getRegisterInfo();
> + unsigned RC = getRegClass(IsVgpr, RegWidth);
> + RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
> + return false;
> }
>
>
> @@ -206,6 +325,14 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
> Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
> return MatchOperand_Success;
> }
> + case AsmToken::Identifier: {
> + SMLoc S, E;
> + unsigned RegNo;
> + if (ParseRegister(RegNo, S, E))
> + return MatchOperand_NoMatch;
> + Operands.push_back(AMDGPUOperand::CreateReg(RegNo, S, E));
> + return MatchOperand_Success;
> + }
> default:
> return MatchOperand_NoMatch;
> }
> @@ -217,17 +344,23 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
> // Add the instruction mnemonic
> Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
>
> - if (getLexer().is(AsmToken::EndOfStatement))
> - return false;
> + while (!getLexer().is(AsmToken::EndOfStatement)) {
>
> - AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
> - switch (Res) {
> - case MatchOperand_Success: return false;
> - case MatchOperand_ParseFail: return Error(NameLoc,
> - "Failed parsing operand");
> - case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
> + AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
> +
> + // Eat the comma if there is one.
> + if (getLexer().is(AsmToken::Comma))
> + Parser.Lex();
> +
> + switch (Res) {
> + case MatchOperand_Success: break;
> + case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
> + "failed parsing operand.");
> + case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
> + "not a valid operand.");
> + }
> }
> - return true;
> + return false;
> }
>
> //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 3dfde3a..ceca467 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -575,7 +575,9 @@ multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
>
> def "" : SMRD_Pseudo <opName, outs, ins, pattern>;
>
> - def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
> + let isCodeGenOnly = 0 in {
> + def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
> + }
>
> def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
> }
> diff --git a/test/MC/R600/smrd.s b/test/MC/R600/smrd.s
> new file mode 100644
> index 0000000..d511c42
> --- /dev/null
> +++ b/test/MC/R600/smrd.s
> @@ -0,0 +1,31 @@
> +// RUN: llvm-mc -arch=r600 -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +s_load_dword s1, s[2:3], 1
> +// CHECK: s_load_dword s1, s[2:3], 0x1 ; encoding: [0x01,0x83,0x00,0xc0]
> +
> +s_load_dword s1, s[2:3], s4
> +// CHECK: s_load_dword s1, s[2:3], s4 ; encoding: [0x04,0x82,0x00,0xc0]
> +
> +s_load_dwordx2 s[2:3], s[2:3], 1
> +// CHECK: s_load_dwordx2 s[2:3], s[2:3], 0x1 ; encoding: [0x01,0x03,0x41,0xc0]
> +
> +s_load_dwordx2 s[2:3], s[2:3], s4
> +// CHECK: s_load_dwordx2 s[2:3], s[2:3], s4 ; encoding: [0x04,0x02,0x41,0xc0]
> +
> +s_load_dwordx4 s[4:7], s[2:3], 1
> +// CHECK: s_load_dwordx4 s[4:7], s[2:3], 0x1 ; encoding: [0x01,0x03,0x82,0xc0]
> +
> +s_load_dwordx4 s[4:7], s[2:3], s4
> +// CHECK: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
> +
> +s_load_dwordx8 s[8:15], s[2:3], 1
> +// CHECK: s_load_dwordx8 s[8:15], s[2:3], 0x1 ; encoding: [0x01,0x03,0xc4,0xc0]
> +
> +s_load_dwordx8 s[8:15], s[2:3], s4
> +// CHECK: s_load_dwordx8 s[8:15], s[2:3], s4 ; encoding: [0x04,0x02,0xc4,0xc0]
> +
> +s_load_dwordx16 s[16:31], s[2:3], 1
> +// CHECK: s_load_dwordx16 s[16:31], s[2:3], 0x1 ; encoding: [0x01,0x03,0x08,0xc1]
> +
> +s_load_dwordx16 s[16:31], s[2:3], s4
> +// CHECK: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1]
> -- 2.0.4
>
> 0005-R600-SI-Assembler-support-for-SOP1-instructions.patch
>
>
> From 8c636e2f929eb2b701c133a406003f8e77cb0d7a Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Fri, 14 Nov 2014 21:24:45 -0500
> Subject: [PATCH 5/5] R600/SI: Assembler support for SOP1 instructions
>
> ---
> docs/R600Usage.rst | 4 +
> lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp | 98 ++++++++++++----
> lib/Target/R600/SIInstrFormats.td | 1 +
> lib/Target/R600/SIInstrInfo.td | 20 +++-
> lib/Target/R600/SIInstructions.td | 24 ++--
> lib/Target/R600/SIRegisterInfo.td | 13 ++-
> test/MC/R600/sop1-err.s | 21 ++++
> test/MC/R600/sop1.s | 157 ++++++++++++++++++++++++++
> 8 files changed, 301 insertions(+), 37 deletions(-)
> create mode 100644 test/MC/R600/sop1-err.s
> create mode 100644 test/MC/R600/sop1.s
>
> diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst
> index 2282d54..5e95b12 100644
> --- a/docs/R600Usage.rst
> +++ b/docs/R600Usage.rst
> @@ -19,6 +19,10 @@ SMRD Instructions
> -----------------
> The assembler currently supports only the s_load_dword* SMRD instructions.
>
> +SOP1 Instructions
> +-----------------
> +All SOP1 instructions are supported.
> +
> SOPP Instructions
> -----------------
>
> diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> index 33cb2bb..862bb3d 100644
> --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> @@ -74,6 +74,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
> Register
> } Kind;
>
> + SMLoc StartLoc, EndLoc;
> +
> public:
> AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
>
> @@ -88,6 +90,7 @@ public:
>
> struct RegOp {
> unsigned RegNo;
> + const MCRegisterInfo *TRI;
> };
>
> union {
> @@ -99,12 +102,22 @@ public:
> void addImmOperands(MCInst &Inst, unsigned N) const {
> Inst.addOperand(MCOperand::CreateImm(getImm()));
> }
> +
> void addRegOperands(MCInst &Inst, unsigned N) const {
> Inst.addOperand(MCOperand::CreateReg(getReg()));
> }
> +
> + void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
> + if (isReg())
> + addRegOperands(Inst, N);
> + else
> + addImmOperands(Inst, N);
> + }
> +
> StringRef getToken() const {
> return StringRef(Tok.Data, Tok.Length);
> }
> +
> bool isToken() const override {
> return Kind == Token;
> }
> @@ -113,6 +126,14 @@ public:
> return Kind == Immediate;
> }
>
> + bool is64BitInlineImm() const {
> + return isImm() && Imm.Val <= -1 && Imm.Val >= -16;
> + }
This doesn't handle the fp64 values
> +
> + bool isImm32Bit() const {
> + return isImm() && isUInt<32>(Imm.Val);
> + }
> +
> int64_t getImm() const {
> return Imm.Val;
> }
> @@ -125,23 +146,42 @@ public:
> return Reg.RegNo;
> }
>
> + bool isRegOrImm() const {
> + return isReg() || isImm();
> + }
> +
> + bool isRegClass(unsigned RCID) const {
> + return Reg.TRI->getRegClass(RCID).contains(getReg());
> + }
> +
> + bool isSSrc32() const {
> + return isImm32Bit() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
> + }
> +
> + bool isSSrc64() const {
> + return isImm32Bit() || is64BitInlineImm() ||
> + (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
> + }
> +
> bool isMem() const override {
> return false;
> }
>
> SMLoc getStartLoc() const override {
> - return SMLoc();
> + return StartLoc;
> }
>
> SMLoc getEndLoc() const override {
> - return SMLoc();
> + return EndLoc;
> }
>
> void print(raw_ostream &OS) const override { }
>
> - static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
> + static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc) {
> auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
> Op->Imm.Val = Val;
> + Op->StartLoc = Loc;
> + Op->EndLoc = Loc;
> return Op;
> }
>
> @@ -149,13 +189,19 @@ public:
> auto Res = llvm::make_unique<AMDGPUOperand>(Token);
> Res->Tok.Data = Str.data();
> Res->Tok.Length = Str.size();
> + Res->StartLoc = Loc;
> + Res->EndLoc = Loc;
> return Res;
> }
>
> static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
> - SMLoc E) {
> + SMLoc E,
> + const MCRegisterInfo *TRI) {
> auto Op = llvm::make_unique<AMDGPUOperand>(Register);
> Op->Reg.RegNo = RegNo;
> + Op->Reg.TRI = TRI;
> + Op->StartLoc = S;
> + Op->EndLoc = E;
> return Op;
> }
>
> @@ -282,22 +328,27 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
> MCInst Inst;
>
> switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
> - case Match_Success:
> - Inst.setLoc(IDLoc);
> - Out.EmitInstruction(Inst, STI);
> - return false;
> - case Match_MissingFeature:
> - return Error(IDLoc, "instruction use requires an option to be enabled");
> - case Match_MnemonicFail:
> - return Error(IDLoc, "unrecognized instruction mnemonic");
> - case Match_InvalidOperand: {
> - if (ErrorInfo != ~0ULL) {
> - if (ErrorInfo >= Operands.size())
> - return Error(IDLoc, "too few operands for instruction");
> -
> + default: break;
> + case Match_Success:
> + Inst.setLoc(IDLoc);
> + Out.EmitInstruction(Inst, STI);
> + return false;
> + case Match_MissingFeature:
> + return Error(IDLoc, "instruction use requires an option to be enabled");
> + case Match_MnemonicFail:
> + return Error(IDLoc, "unrecognized instruction mnemonic");
> + case Match_InvalidOperand: {
> + SMLoc ErrorLoc = IDLoc;
> + if (ErrorInfo != ~0ULL) {
> + if (ErrorInfo >= Operands.size())
> + return Error(IDLoc, "too few operands for instruction");
> +
> + }
> + ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
> + if (ErrorLoc == SMLoc())
> + ErrorLoc = IDLoc;
> + return Error(ErrorLoc, "invalid operand for instruction");
> }
> - return Error(IDLoc, "invalid operand for instruction");
> - }
> }
> llvm_unreachable("Implement any new match types added!");
> }
> @@ -319,10 +370,11 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>
> switch(getLexer().getKind()) {
> case AsmToken::Integer: {
> + SMLoc S = Parser.getTok().getLoc();
> int64_t IntVal;
> if (getParser().parseAbsoluteExpression(IntVal))
> return MatchOperand_ParseFail;
> - Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
> + Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
> return MatchOperand_Success;
> }
> case AsmToken::Identifier: {
> @@ -330,7 +382,8 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
> unsigned RegNo;
> if (ParseRegister(RegNo, S, E))
> return MatchOperand_NoMatch;
> - Operands.push_back(AMDGPUOperand::CreateReg(RegNo, S, E));
> + Operands.push_back(
> + AMDGPUOperand::CreateReg(RegNo, S, E, getContext().getRegisterInfo()));
> return MatchOperand_Success;
> }
> default:
> @@ -417,6 +470,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
> // expcnt [6:4]
> // lgkmcnt [10:8]
> int64_t CntVal = 0x77f;
> + SMLoc S = Parser.getTok().getLoc();
>
> switch(getLexer().getKind()) {
> default: return MatchOperand_ParseFail;
> @@ -433,7 +487,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
> } while(getLexer().isNot(AsmToken::EndOfStatement));
> break;
> }
> - Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
> + Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
> return MatchOperand_Success;
> }
>
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 16a35ff..611d41c6 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -213,6 +213,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
> let mayLoad = 0;
> let mayStore = 0;
> let hasSideEffects = 0;
> + let isCodeGenOnly = 0;
> let SALU = 1;
> let SOP1 = 1;
> }
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index ceca467..baaecdc 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -381,12 +381,15 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
> SOP1 <outs, ins, "", pattern>,
> SIMCInstr<opName, SISubtarget.NONE> {
> let isPseudo = 1;
> + let isCodeGenOnly = 1;
> }
>
> class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
> SOP1 <outs, ins, asm, []>,
> SOP1e <op.SI>,
> - SIMCInstr<opName, SISubtarget.SI>;
> + SIMCInstr<opName, SISubtarget.SI> {
> + let isCodeGenOnly = 0;
> +}
>
> class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
> SOP1 <outs, ins, asm, []>,
> @@ -429,6 +432,21 @@ multiclass SOP1_64_0 <sop1 op, string opName, list<dag> pattern> {
> }
> }
>
> +// 64-bit input, no output
> +multiclass SOP1_1 <sop1 op, string opName, list<dag> pattern> {
> + def "" : SOP1_Pseudo <opName, (outs), (ins SReg_64:$src0), pattern>;
> +
> + def _si : SOP1_Real_si <op, opName, (outs), (ins SReg_64:$src0),
> + opName#" $src0"> {
> + let SDST = 0;
> + }
> +
> + def _vi : SOP1_Real_vi <op, opName, (outs), (ins SReg_64:$src0),
> + opName#" $src0"> {
> + let SDST = 0;
> + }
> +}
> +
> // 64-bit input, 32-bit output.
> multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
> op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index db9301d..c9f883a 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -133,28 +133,28 @@ defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32",
> defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>;
>
> let Defs = [SCC] in {
> - //defm S_BCNT0_I32_B32 : SOP1_BCNT0 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
> - //defm S_BCNT0_I32_B64 : SOP1_BCNT0 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
> + defm S_BCNT0_I32_B32 : SOP1_32 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
> + defm S_BCNT0_I32_B64 : SOP1_32_64 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
> defm S_BCNT1_I32_B32 : SOP1_32 <sop1<0x0f, 0x0c>, "s_bcnt1_i32_b32",
> [(set i32:$dst, (ctpop i32:$src0))]
> >;
> defm S_BCNT1_I32_B64 : SOP1_32_64 <sop1<0x10, 0x0d>, "s_bcnt1_i32_b64", []>;
> } // End Defs = [SCC]
>
> -//defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
> -//defm S_FF0_I32_B64 : SOP1_FF0 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
> +defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
> +defm S_FF0_I32_B64 : SOP1_32_64 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
> defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
> [(set i32:$dst, (cttz_zero_undef i32:$src0))]
> >;
> -////defm S_FF1_I32_B64 : SOP1_FF1 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
> +defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
>
> defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
> [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
> >;
>
> -//defm S_FLBIT_I32_B64 : SOP1_32 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
> +defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
> defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", []>;
> -//defm S_FLBIT_I32_I64 : SOP1_32 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
> +defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
> defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
> [(set i32:$dst, (sext_inreg i32:$src0, i8))]
> >;
> @@ -162,10 +162,10 @@ defm S_SEXT_I32_I16 : SOP1_32 <sop1<0x1a, 0x17>, "s_sext_i32_i16",
> [(set i32:$dst, (sext_inreg i32:$src0, i16))]
> >;
>
> -////defm S_BITSET0_B32 : SOP1_BITSET0 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
> -////defm S_BITSET0_B64 : SOP1_BITSET0 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
> -////defm S_BITSET1_B32 : SOP1_BITSET1 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
> -////defm S_BITSET1_B64 : SOP1_BITSET1 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
> +defm S_BITSET0_B32 : SOP1_32 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
> +defm S_BITSET0_B64 : SOP1_64 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
> +defm S_BITSET1_B32 : SOP1_32 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
> +defm S_BITSET1_B64 : SOP1_64 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
> defm S_GETPC_B64 : SOP1_64_0 <sop1<0x1f, 0x1c>, "s_getpc_b64", []>;
> defm S_SETPC_B64 : SOP1_64 <sop1<0x20, 0x1d>, "s_setpc_b64", []>;
> defm S_SWAPPC_B64 : SOP1_64 <sop1<0x21, 0x1e>, "s_swappc_b64", []>;
> @@ -190,7 +190,7 @@ defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
> defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
> defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
> defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
> -//defm S_CBRANCH_JOIN : SOP1_ <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
> +defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
> defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
> let Defs = [SCC] in {
> defm S_ABS_I32 : SOP1_32 <sop1<0x34, 0x30>, "s_abs_i32", []>;
> diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
> index 3b0971b..3772c7e 100644
> --- a/lib/Target/R600/SIRegisterInfo.td
> +++ b/lib/Target/R600/SIRegisterInfo.td
> @@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
> // Register classes used as source and destination
> //===----------------------------------------------------------------------===//
>
> +class RegImmMatcher<string name> : AsmOperandClass {
> + let Name = name;
> + let RenderMethod = "addRegOrImmOperands";
> +}
> +
> // Special register classes for predicates and the M0 register
> def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
> let CopyCost = -1; // Theoretically it is possible to read from SCC,
> @@ -225,9 +230,13 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
> // SSrc_* Operands with an SGPR or a 32-bit immediate
> //===----------------------------------------------------------------------===//
>
> -def SSrc_32 : RegImmOperand<SReg_32>;
> +def SSrc_32 : RegImmOperand<SReg_32> {
> + let ParserMatchClass = RegImmMatcher<"SSrc32">;
> +}
>
> -def SSrc_64 : RegImmOperand<SReg_64>;
> +def SSrc_64 : RegImmOperand<SReg_64> {
> + let ParserMatchClass = RegImmMatcher<"SSrc64">;
> +}
>
> //===----------------------------------------------------------------------===//
> // SCSrc_* Operands with an SGPR or a inline constant
> diff --git a/test/MC/R600/sop1-err.s b/test/MC/R600/sop1-err.s
> new file mode 100644
> index 0000000..96a4ffe
> --- /dev/null
> +++ b/test/MC/R600/sop1-err.s
> @@ -0,0 +1,21 @@
> +// RUN: not llvm-mc -arch=r600 -mcpu=SI %s 2>&1 | FileCheck %s
> +
> +s_mov_b32 v1, s2
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s1, v0
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b64 s1, s[0:1]
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b64 s[0:1], s1
> +// CHECK: error: invalid operand for instruction
> +
> +// Immediate greater than 32-bits
> +s_mov_b32 s1, 0xfffffffff
> +// CHECK: error: invalid operand for instruction
> +
> +// Immediate greater than 32-bits
> +s_mov_b64 s[0:1], 0xfffffffff
> +// CHECK: error: invalid operand for instruction
> diff --git a/test/MC/R600/sop1.s b/test/MC/R600/sop1.s
> new file mode 100644
> index 0000000..31133e5
> --- /dev/null
> +++ b/test/MC/R600/sop1.s
> @@ -0,0 +1,157 @@
> +// RUN: llvm-mc -arch=r600 -mcpu=SI -show-encoding %s | FileCheck %s
> +
> +s_mov_b32 s1, s2
> +// CHECK: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe]
> +
> +s_mov_b32 s1, 1
> +// CHECK: s_mov_b32 s1, 1 ; encoding: [0x81,0x03,0x81,0xbe]
> +
> +s_mov_b32 s1, 100
> +// CHECK: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00]
> +
> +s_mov_b64 s[2:3], s[4:5]
> +// CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
> +
> +s_mov_b64 s[2:3], 0xffffffffffffffff
> +// CHECK: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe]
> +
> +s_cmov_b32 s1, s2
> +// CHECK: s_cmov_b32 s1, s2 ; encoding: [0x02,0x05,0x81,0xbe]
> +
> +s_cmov_b64 s[2:3], s[4:5]
> +// CHECK: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x06,0x82,0xbe]
> +
> +s_not_b32 s1, s2
> +// CHECK: s_not_b32 s1, s2 ; encoding: [0x02,0x07,0x81,0xbe]
> +
> +s_not_b64 s[2:3], s[4:5]
> +// CHECK: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x08,0x82,0xbe]
> +
> +s_wqm_b32 s1, s2
> +// CHECK: s_wqm_b32 s1, s2 ; encoding: [0x02,0x09,0x81,0xbe]
> +
> +s_wqm_b64 s[2:3], s[4:5]
> +// CHECK: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0a,0x82,0xbe]
> +
> +s_brev_b32 s1, s2
> +// CHECK: s_brev_b32 s1, s2 ; encoding: [0x02,0x0b,0x81,0xbe]
> +
> +s_brev_b64 s[2:3], s[4:5]
> +// CHECK: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0c,0x82,0xbe]
> +
> +s_bcnt0_i32_b32 s1, s2
> +// CHECK: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0d,0x81,0xbe]
> +
> +s_bcnt0_i32_b64 s1, s[2:3]
> +// CHECK: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0e,0x81,0xbe]
> +
> +s_bcnt1_i32_b32 s1, s2
> +// CHECK: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0f,0x81,0xbe]
> +
> +s_bcnt1_i32_b64 s1, s[2:3]
> +// CHECK: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x10,0x81,0xbe]
> +
> +s_ff0_i32_b32 s1, s2
> +// CHECK: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x11,0x81,0xbe]
> +
> +s_ff0_i32_b64 s1, s[2:3]
> +// CHECK: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x12,0x81,0xbe]
> +
> +s_ff1_i32_b32 s1, s2
> +// CHECK: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x13,0x81,0xbe]
> +
> +s_ff1_i32_b64 s1, s[2:3]
> +// CHECK: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x14,0x81,0xbe]
> +
> +s_flbit_i32_b32 s1, s2
> +// CHECK: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x15,0x81,0xbe]
> +
> +s_flbit_i32_b64 s1, s[2:3]
> +// CHECK: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x16,0x81,0xbe]
> +
> +s_flbit_i32 s1, s2
> +// CHECK: s_flbit_i32 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe]
> +
> +s_flbit_i32_i64 s1, s[2:3]
> +// CHECK: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x18,0x81,0xbe]
> +
> +s_sext_i32_i8 s1, s2
> +// CHECK: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x19,0x81,0xbe]
> +
> +s_sext_i32_i16 s1, s2
> +// CHECK: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe]
> +
> +s_bitset0_b32 s1, s2
> +// CHECK: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x1b,0x81,0xbe]
> +
> +s_bitset0_b64 s[2:3], s[4:5]
> +// CHECK: s_bitset0_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1c,0x82,0xbe]
> +
> +s_bitset1_b32 s1, s2
> +// CHECK: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1d,0x81,0xbe]
> +
> +s_bitset1_b64 s[2:3], s[4:5]
> +// CHECK: s_bitset1_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1e,0x82,0xbe]
> +
> +s_getpc_b64 s[2:3]
> +// CHECK: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1f,0x82,0xbe]
> +
> +s_setpc_b64 s[2:3], s[4:5]
> +// CHECK: s_setpc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x20,0x82,0xbe]
> +
> +s_swappc_b64 s[2:3], s[4:5]
> +// CHECK: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe]
> +
> +s_rfe_b64 s[2:3], s[4:5]
> +// CHECK: s_rfe_b64 s[2:3], s[4:5] ; encoding: [0x04,0x22,0x82,0xbe]
> +
> +s_and_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe]
> +
> +s_or_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe]
> +
> +s_xor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe]
> +
> +s_andn2_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe]
> +
> +s_orn2_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x28,0x82,0xbe]
> +
> +s_nand_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe]
> +
> +s_nor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2a,0x82,0xbe]
> +
> +s_xnor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe]
> +
> +s_quadmask_b32 s1, s2
> +// CHECK: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe]
> +
> +s_quadmask_b64 s[2:3], s[4:5]
> +// CHECK: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe]
> +
> +s_movrels_b32 s1, s2
> +// CHECK: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2e,0x81,0xbe]
> +
> +s_movrels_b64 s[2:3], s[4:5]
> +// CHECK: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2f,0x82,0xbe]
> +
> +s_movreld_b32 s1, s2
> +// CHECK: s_movreld_b32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe]
> +
> +s_movreld_b64 s[2:3], s[4:5]
> +// CHECK: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x31,0x82,0xbe]
> +
> +s_cbranch_join s[4:5]
> +// CHECK: s_cbranch_join s[4:5] ; encoding: [0x04,0x32,0x80,0xbe]
> +
> +s_abs_i32 s1, s2
> +// CHECK: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe]
> +
> +s_mov_fed_b32 s1, s2
> +// CHECK: s_mov_fed_b32 s1, s2 ; encoding: [0x02,0x35,0x81,0xbe]
> -- 2.0.4
Some cases with literals for other operands in other instructions might
be useful. The only immediate operands I see are on mov instructions /
first source operand
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150212/3bc0215f/attachment.html>
More information about the llvm-commits
mailing list