PATCHES: R600/SI: Enable SOP1 and s_load_* instructions in the assembler

Thu Feb 12 10:08:39 PST 2015

On 02/11/2015 11:41 AM, Tom Stellard wrote:
> Hi,
>
> These patches add support for more scalar instructions to the assembler.
>
> -Tom
>
> 0001-R600-SI-Remove-some-unused-TableGen-classes.patch
>
>
>  From 2c64f439bcddcd154be74cae2faaccb8920a7b0a Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Thu, 11 Dec 2014 16:18:02 -0500
> Subject: [PATCH 1/5] R600/SI: Remove some unused TableGen classes
>
> ---
>   lib/Target/R600/SIInstrInfo.td | 19 -------------------
>   1 file changed, 19 deletions(-)
>
LGTM
>
> 0002-R600-SI-Lowercase-register-names.patch
>
>
>  From a24954e7cffcac84c0fca15dea128a5f8d083dc5 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 2 Dec 2014 23:33:30 -0500
> Subject: [PATCH 2/5] R600/SI: Lowercase register names
>
> ---
>   lib/Target/R600/SIRegisterInfo.td | 8 ++++----
>   1 file changed, 4 insertions(+), 4 deletions(-)
>
LGTM
>
> 0003-R600-SI-Refactor-SOP1-classes.patch
>
>
>  From c0bbcac32c9c59b29b1eef3397568baa5ad2e11a Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 9 Dec 2014 16:47:37 -0500
> Subject: [PATCH 3/5] R600/SI: Refactor SOP1 classes
>
> ---
>   lib/Target/R600/SIInstrInfo.td | 45 ++++++++++++++++++------------------------
>   1 file changed, 19 insertions(+), 26 deletions(-)
>
LGTM

> 0004-R600-SI-Add-assembler-support-for-s_load_dword-instr.patch
>
>
>  From f87d5c404203037687629c05fddb8a5adc055784 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Fri, 14 Nov 2014 06:22:05 -0500
> Subject: [PATCH 4/5] R600/SI: Add assembler support for s_load_dword*
>   instructions
>
> ---
>   docs/R600Usage.rst                            |   4 +
>   lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp | 161 +++++++++++++++++++++++---
>   lib/Target/R600/SIInstrInfo.td                |   4 +-
>   test/MC/R600/smrd.s                           |  31 +++++
>   4 files changed, 185 insertions(+), 15 deletions(-)
>   create mode 100644 test/MC/R600/smrd.s
>
> diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst
> index 48a30c8..2282d54 100644
> --- a/docs/R600Usage.rst
> +++ b/docs/R600Usage.rst
> @@ -15,6 +15,10 @@ Assembler
>   The assembler is currently a work in progress and not yet complete.  Below
>   are the currently supported features.
>   
> +SMRD Instructions
> +-----------------
> +The assembler currently supports only the s_load_dword* SMRD instructions.
> +
>   SOPP Instructions
>   -----------------
>   
> diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> index 3b4ba1a..33cb2bb 100644
> --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> @@ -27,6 +27,7 @@
>   #include "llvm/Support/SourceMgr.h"
>   #include "llvm/Support/TargetRegistry.h"
>   #include "llvm/Support/raw_ostream.h"
> +#include "llvm/Support/Debug.h"
>   
>   using namespace llvm;
>   
> @@ -69,7 +70,8 @@ public:
>   class AMDGPUOperand : public MCParsedAsmOperand {
>     enum KindTy {
>       Token,
> -    Immediate
> +    Immediate,
> +    Register
>     } Kind;
>   
>   public:
> @@ -84,16 +86,21 @@ public:
>       int64_t Val;
>     };
>   
> +  struct RegOp {
> +    unsigned RegNo;
> +  };
> +
>     union {
>       TokOp Tok;
>       ImmOp Imm;
> +    RegOp Reg;
>     };
>   
>     void addImmOperands(MCInst &Inst, unsigned N) const {
>       Inst.addOperand(MCOperand::CreateImm(getImm()));
>     }
>     void addRegOperands(MCInst &Inst, unsigned N) const {
> -    llvm_unreachable("addRegOperands");
> +    Inst.addOperand(MCOperand::CreateReg(getReg()));
>     }
>     StringRef getToken() const {
>       return StringRef(Tok.Data, Tok.Length);
> @@ -111,11 +118,11 @@ public:
>     }
>   
>     bool isReg() const override {
> -    return false;
> +    return Kind == Register;
>     }
>   
>     unsigned getReg() const override {
> -    return 0;
> +    return Reg.RegNo;
>     }
>   
>     bool isMem() const override {
> @@ -145,13 +152,125 @@ public:
>       return Res;
>     }
>   
> +  static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
> +                                                  SMLoc E) {
> +    auto Op = llvm::make_unique<AMDGPUOperand>(Register);
> +    Op->Reg.RegNo = RegNo;
> +    return Op;
> +  }
> +
>     bool isSWaitCnt() const;
>   };
>   
>   }
>   
> +static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
> +  if (IsVgpr) {
> +    switch (RegWidth) {
> +    default: llvm_unreachable("Unknown register width");
> +    case 1: return AMDGPU::VGPR_32RegClassID;
> +    case 2: return AMDGPU::VReg_64RegClassID;
> +    case 3: return AMDGPU::VReg_96RegClassID;
> +    case 4: return AMDGPU::VReg_128RegClassID;
> +    case 8: return AMDGPU::VReg_256RegClassID;
> +    case 16: return AMDGPU::VReg_512RegClassID;
> +    }
> +  } else {
No need for the else
> +    switch (RegWidth) {
> +    default: llvm_unreachable("Unknown register width");
> +    case 1: return AMDGPU::SGPR_32RegClassID;
> +    case 2: return AMDGPU::SGPR_64RegClassID;
> +    case 4: return AMDGPU::SReg_128RegClassID;
> +    case 8: return AMDGPU::SReg_256RegClassID;
> +    case 16: return AMDGPU::SReg_512RegClassID;
> +    }
> +  }
> +}
> +
>   bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
> -  return true;
> +  const AsmToken Tok = Parser.getTok();
> +  StartLoc = Tok.getLoc();
> +  EndLoc = Tok.getEndLoc();
> +  const StringRef &RegName = Tok.getString();
> +  RegNo = 0;
> +
> +  // Handle special cases
> +  if (RegName.equals("vcc_lo"))
> +    RegNo = AMDGPU::VCC_LO;
> +  else if (RegName.equals("vcc_hi"))
> +    RegNo = AMDGPU::VCC_HI;
> +  else if (RegName.equals("vcc"))
> +    RegNo = AMDGPU::VCC;
> +  else if (RegName.equals("exec_lo"))
> +    RegNo = AMDGPU::EXEC_LO;
> +  else if (RegName.equals("exec_hi"))
> +    RegNo = AMDGPU::EXEC_HI;
> +  else if (RegName.equals("exec"))
> +    RegNo = AMDGPU::EXEC;
> +  else if (RegName.equals("m0"))
> +    RegNo = AMDGPU::M0;
> +  else if (RegName.equals("flat_scr_lo"))
> +    RegNo = AMDGPU::FLAT_SCR_LO;
> +  else if (RegName.equals("flat_scr_hi"))
> +    RegNo = AMDGPU::FLAT_SCR_HI;
> +  else if (RegName.equals("flat_scr"))
> +    RegNo = AMDGPU::FLAT_SCR;
> +  else if (RegName.equals("scc"))
> +    RegNo = AMDGPU::SCC;
I think this should be split into a separate function, use StringSwitch, 
and be sorted so that the common registers come first. e.g. vcc and exec 
are frequently used, but I've almost never seen the _lo/_hi on it.

> +
> +  if (RegNo)
> +    return false;
> +
> +  // Match vgprs and sgprs
> +  if (RegName[0] != 's' && RegName[0] != 'v')
> +    return true;
> +
> +  bool IsVgpr = RegName[0] == 'v';
> +  unsigned RegWidth;
> +  unsigned RegIndexInClass;
> +  if (RegName.size() > 1) {
> +    // We have a 32-bit register
> +    RegWidth = 1;
> +    if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
> +      return true;
> +    Parser.Lex();
> +  } else {
> +    // We have a register greater than 32-bits.
> +
> +    int64_t RegLo, RegHi;
> +    Parser.Lex();
> +    if (getLexer().isNot(AsmToken::LBrac))
> +      return true;
> +
> +    Parser.Lex();
> +    if (getParser().parseAbsoluteExpression(RegLo))
> +      return true;
> +
> +    if (getLexer().isNot(AsmToken::Colon))
> +      return true;
> +
> +    Parser.Lex();
> +    if (getParser().parseAbsoluteExpression(RegHi))
> +      return true;
> +
> +    if (getLexer().isNot(AsmToken::RBrac))
> +      return true;
> +
> +    Parser.Lex();
> +    RegWidth = (RegHi - RegLo) + 1;
> +    if (IsVgpr) {
> +      // VGPR registers aren't aligned.
> +      RegIndexInClass = RegLo;
> +    } else {
> +      // SGPR registers are aligned.  Max alignment is 4 dwords.
> +      RegIndexInClass = RegLo / std::min(RegWidth, 4u);
> +    }
> +  }
> +
> +  const MCRegisterInfo *TRC = getContext().getRegisterInfo();
> +  unsigned RC = getRegClass(IsVgpr, RegWidth);
> +  RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
> +  return false;
>   }
>   
>   
> @@ -206,6 +325,14 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>         Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
>         return MatchOperand_Success;
>       }
> +    case AsmToken::Identifier: {
> +      SMLoc S, E;
> +      unsigned RegNo;
> +      if (ParseRegister(RegNo, S, E))
> +        return MatchOperand_NoMatch;
> +      Operands.push_back(AMDGPUOperand::CreateReg(RegNo, S, E));
> +      return MatchOperand_Success;
> +    }
>       default:
>         return MatchOperand_NoMatch;
>     }
> @@ -217,17 +344,23 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
>     // Add the instruction mnemonic
>     Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
>   
> -  if (getLexer().is(AsmToken::EndOfStatement))
> -    return false;
> +  while (!getLexer().is(AsmToken::EndOfStatement)) {
>   
> -  AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
> -  switch (Res) {
> -    case MatchOperand_Success: return false;
> -    case MatchOperand_ParseFail: return Error(NameLoc,
> -                                              "Failed parsing operand");
> -    case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
> +    AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
> +
> +    // Eat the comma if there is one.
> +    if (getLexer().is(AsmToken::Comma))
> +      Parser.Lex();
> +
> +    switch (Res) {
> +      case MatchOperand_Success: break;
> +      case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
> +                                                "failed parsing operand.");
> +      case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
> +                                              "not a valid operand.");
> +    }
>     }
> -  return true;
> +  return false;
>   }
>   
>   //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 3dfde3a..ceca467 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -575,7 +575,9 @@ multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
>   
>     def "" : SMRD_Pseudo <opName, outs, ins, pattern>;
>   
> -  def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
> +  let isCodeGenOnly = 0 in {
> +    def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
> +  }
>   
>     def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
>   }
> diff --git a/test/MC/R600/smrd.s b/test/MC/R600/smrd.s
> new file mode 100644
> index 0000000..d511c42
> --- /dev/null
> +++ b/test/MC/R600/smrd.s
> @@ -0,0 +1,31 @@
> +// RUN: llvm-mc -arch=r600 -mcpu=SI  -show-encoding %s | FileCheck %s
> +
> +s_load_dword s1, s[2:3], 1
> +// CHECK: s_load_dword s1, s[2:3], 0x1 ; encoding: [0x01,0x83,0x00,0xc0]
> +
> +s_load_dword s1, s[2:3], s4
> +// CHECK: s_load_dword s1, s[2:3], s4 ; encoding: [0x04,0x82,0x00,0xc0]
> +
> +s_load_dwordx2 s[2:3], s[2:3], 1
> +// CHECK: s_load_dwordx2 s[2:3], s[2:3], 0x1 ; encoding: [0x01,0x03,0x41,0xc0]
> +
> +s_load_dwordx2 s[2:3], s[2:3], s4
> +// CHECK: s_load_dwordx2 s[2:3], s[2:3], s4 ; encoding: [0x04,0x02,0x41,0xc0]
> +
> +s_load_dwordx4 s[4:7], s[2:3], 1
> +// CHECK: s_load_dwordx4 s[4:7], s[2:3], 0x1 ; encoding: [0x01,0x03,0x82,0xc0]
> +
> +s_load_dwordx4 s[4:7], s[2:3], s4
> +// CHECK: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
> +
> +s_load_dwordx8 s[8:15], s[2:3], 1
> +// CHECK: s_load_dwordx8 s[8:15], s[2:3], 0x1 ; encoding: [0x01,0x03,0xc4,0xc0]
> +
> +s_load_dwordx8 s[8:15], s[2:3], s4
> +// CHECK: s_load_dwordx8 s[8:15], s[2:3], s4 ; encoding: [0x04,0x02,0xc4,0xc0]
> +
> +s_load_dwordx16 s[16:31], s[2:3], 1
> +// CHECK: s_load_dwordx16 s[16:31], s[2:3], 0x1 ; encoding: [0x01,0x03,0x08,0xc1]
> +
> +s_load_dwordx16 s[16:31], s[2:3], s4
> +// CHECK: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1]
> -- 2.0.4
>
> 0005-R600-SI-Assembler-support-for-SOP1-instructions.patch
>
>
>  From 8c636e2f929eb2b701c133a406003f8e77cb0d7a Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Fri, 14 Nov 2014 21:24:45 -0500
> Subject: [PATCH 5/5] R600/SI: Assembler support for SOP1 instructions
>
> ---
>   docs/R600Usage.rst                            |   4 +
>   lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp |  98 ++++++++++++----
>   lib/Target/R600/SIInstrFormats.td             |   1 +
>   lib/Target/R600/SIInstrInfo.td                |  20 +++-
>   lib/Target/R600/SIInstructions.td             |  24 ++--
>   lib/Target/R600/SIRegisterInfo.td             |  13 ++-
>   test/MC/R600/sop1-err.s                       |  21 ++++
>   test/MC/R600/sop1.s                           | 157 ++++++++++++++++++++++++++
>   8 files changed, 301 insertions(+), 37 deletions(-)
>   create mode 100644 test/MC/R600/sop1-err.s
>   create mode 100644 test/MC/R600/sop1.s
>
> diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst
> index 2282d54..5e95b12 100644
> --- a/docs/R600Usage.rst
> +++ b/docs/R600Usage.rst
> @@ -19,6 +19,10 @@ SMRD Instructions
>   -----------------
>   The assembler currently supports only the s_load_dword* SMRD instructions.
>   
> +SOP1 Instructions
> +-----------------
> +All SOP1 instructions are supported.
> +
>   SOPP Instructions
>   -----------------
>   
> diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> index 33cb2bb..862bb3d 100644
> --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
> @@ -74,6 +74,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
>       Register
>     } Kind;
>   
> +  SMLoc StartLoc, EndLoc;
> +
>   public:
>     AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
>   
> @@ -88,6 +90,7 @@ public:
>   
>     struct RegOp {
>       unsigned RegNo;
> +    const MCRegisterInfo *TRI;
>     };
>   
>     union {
> @@ -99,12 +102,22 @@ public:
>     void addImmOperands(MCInst &Inst, unsigned N) const {
>       Inst.addOperand(MCOperand::CreateImm(getImm()));
>     }
> +
>     void addRegOperands(MCInst &Inst, unsigned N) const {
>       Inst.addOperand(MCOperand::CreateReg(getReg()));
>     }
> +
> +  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
> +    if (isReg())
> +      addRegOperands(Inst, N);
> +    else
> +      addImmOperands(Inst, N);
> +  }
> +
>     StringRef getToken() const {
>       return StringRef(Tok.Data, Tok.Length);
>     }
> +
>     bool isToken() const override {
>       return Kind == Token;
>     }
> @@ -113,6 +126,14 @@ public:
>       return Kind == Immediate;
>     }
>   
> +  bool is64BitInlineImm() const {
> +    return isImm() && Imm.Val <= -1 && Imm.Val >= -16;
> +  }
This doesn't handle the fp64 values
> +
> +  bool isImm32Bit() const {
> +    return isImm() && isUInt<32>(Imm.Val);
> +  }
> +
>     int64_t getImm() const {
>       return Imm.Val;
>     }
> @@ -125,23 +146,42 @@ public:
>       return Reg.RegNo;
>     }
>   
> +  bool isRegOrImm() const {
> +    return isReg() || isImm();
> +  }
> +
> +  bool isRegClass(unsigned RCID) const {
> +    return Reg.TRI->getRegClass(RCID).contains(getReg());
> +  }
> +
> +  bool isSSrc32() const {
> +    return isImm32Bit() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
> +  }
> +
> +  bool isSSrc64() const {
> +    return isImm32Bit() || is64BitInlineImm() ||
> +           (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
> +  }
> +
>     bool isMem() const override {
>       return false;
>     }
>   
>     SMLoc getStartLoc() const override {
> -    return SMLoc();
> +    return StartLoc;
>     }
>   
>     SMLoc getEndLoc() const override {
> -    return SMLoc();
> +    return EndLoc;
>     }
>   
>     void print(raw_ostream &OS) const override { }
>   
> -  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
> +  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc) {
>       auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
>       Op->Imm.Val = Val;
> +    Op->StartLoc = Loc;
> +    Op->EndLoc = Loc;
>       return Op;
>     }
>   
> @@ -149,13 +189,19 @@ public:
>       auto Res = llvm::make_unique<AMDGPUOperand>(Token);
>       Res->Tok.Data = Str.data();
>       Res->Tok.Length = Str.size();
> +    Res->StartLoc = Loc;
> +    Res->EndLoc = Loc;
>       return Res;
>     }
>   
>     static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
> -                                                  SMLoc E) {
> +                                                  SMLoc E,
> +                                                  const MCRegisterInfo *TRI) {
>       auto Op = llvm::make_unique<AMDGPUOperand>(Register);
>       Op->Reg.RegNo = RegNo;
> +    Op->Reg.TRI = TRI;
> +    Op->StartLoc = S;
> +    Op->EndLoc = E;
>       return Op;
>     }
>   
> @@ -282,22 +328,27 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
>     MCInst Inst;
>   
>     switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
> -  case Match_Success:
> -    Inst.setLoc(IDLoc);
> -    Out.EmitInstruction(Inst, STI);
> -    return false;
> -  case Match_MissingFeature:
> -    return Error(IDLoc, "instruction use requires an option to be enabled");
> -  case Match_MnemonicFail:
> -    return Error(IDLoc, "unrecognized instruction mnemonic");
> -  case Match_InvalidOperand: {
> -    if (ErrorInfo != ~0ULL) {
> -      if (ErrorInfo >= Operands.size())
> -        return Error(IDLoc, "too few operands for instruction");
> -
> +    default: break;
> +    case Match_Success:
> +      Inst.setLoc(IDLoc);
> +      Out.EmitInstruction(Inst, STI);
> +      return false;
> +    case Match_MissingFeature:
> +      return Error(IDLoc, "instruction use requires an option to be enabled");
> +    case Match_MnemonicFail:
> +        return Error(IDLoc, "unrecognized instruction mnemonic");
> +    case Match_InvalidOperand: {
> +      SMLoc ErrorLoc = IDLoc;
> +      if (ErrorInfo != ~0ULL) {
> +        if (ErrorInfo >= Operands.size())
> +          return Error(IDLoc, "too few operands for instruction");
> +
> +      }
> +      ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
> +      if (ErrorLoc == SMLoc())
> +        ErrorLoc = IDLoc;
> +      return Error(ErrorLoc, "invalid operand for instruction");
>       }
> -    return Error(IDLoc, "invalid operand for instruction");
> -  }
>     }
>     llvm_unreachable("Implement any new match types added!");
>   }
> @@ -319,10 +370,11 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>   
>     switch(getLexer().getKind()) {
>       case AsmToken::Integer: {
> +      SMLoc S = Parser.getTok().getLoc();
>         int64_t IntVal;
>         if (getParser().parseAbsoluteExpression(IntVal))
>           return MatchOperand_ParseFail;
> -      Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
> +      Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
>         return MatchOperand_Success;
>       }
>       case AsmToken::Identifier: {
> @@ -330,7 +382,8 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
>         unsigned RegNo;
>         if (ParseRegister(RegNo, S, E))
>           return MatchOperand_NoMatch;
> -      Operands.push_back(AMDGPUOperand::CreateReg(RegNo, S, E));
> +      Operands.push_back(
> +          AMDGPUOperand::CreateReg(RegNo, S, E, getContext().getRegisterInfo()));
>         return MatchOperand_Success;
>       }
>       default:
> @@ -417,6 +470,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
>     // expcnt  [6:4]
>     // lgkmcnt [10:8]
>     int64_t CntVal = 0x77f;
> +  SMLoc S = Parser.getTok().getLoc();
>   
>     switch(getLexer().getKind()) {
>       default: return MatchOperand_ParseFail;
> @@ -433,7 +487,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
>         } while(getLexer().isNot(AsmToken::EndOfStatement));
>         break;
>     }
> -  Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
> +  Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
>     return MatchOperand_Success;
>   }
>   
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 16a35ff..611d41c6 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -213,6 +213,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
>     let mayLoad = 0;
>     let mayStore = 0;
>     let hasSideEffects = 0;
> +  let isCodeGenOnly = 0;
>     let SALU = 1;
>     let SOP1 = 1;
>   }
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index ceca467..baaecdc 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -381,12 +381,15 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
>     SOP1 <outs, ins, "", pattern>,
>     SIMCInstr<opName, SISubtarget.NONE> {
>     let isPseudo = 1;
> +  let isCodeGenOnly = 1;
>   }
>   
>   class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
>     SOP1 <outs, ins, asm, []>,
>     SOP1e <op.SI>,
> -  SIMCInstr<opName, SISubtarget.SI>;
> +  SIMCInstr<opName, SISubtarget.SI> {
> +  let isCodeGenOnly = 0;
> +}
>   
>   class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
>     SOP1 <outs, ins, asm, []>,
> @@ -429,6 +432,21 @@ multiclass SOP1_64_0 <sop1 op, string opName, list<dag> pattern> {
>     }
>   }
>   
> +// 64-bit input, no output
> +multiclass SOP1_1 <sop1 op, string opName, list<dag> pattern> {
> +  def "" : SOP1_Pseudo <opName, (outs), (ins SReg_64:$src0), pattern>;
> +
> +  def _si : SOP1_Real_si <op, opName, (outs), (ins SReg_64:$src0),
> +    opName#" $src0"> {
> +    let SDST = 0;
> +  }
> +
> +  def _vi : SOP1_Real_vi <op, opName, (outs), (ins SReg_64:$src0),
> +    opName#" $src0"> {
> +    let SDST = 0;
> +  }
> +}
> +
>   // 64-bit input, 32-bit output.
>   multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
>       op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index db9301d..c9f883a 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -133,28 +133,28 @@ defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32",
>   defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>;
>   
>   let Defs = [SCC] in {
> -  //defm S_BCNT0_I32_B32 : SOP1_BCNT0 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
> -  //defm S_BCNT0_I32_B64 : SOP1_BCNT0 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
> +  defm S_BCNT0_I32_B32 : SOP1_32 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
> +  defm S_BCNT0_I32_B64 : SOP1_32_64 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
>     defm S_BCNT1_I32_B32 : SOP1_32 <sop1<0x0f, 0x0c>, "s_bcnt1_i32_b32",
>       [(set i32:$dst, (ctpop i32:$src0))]
>     >;
>     defm S_BCNT1_I32_B64 : SOP1_32_64 <sop1<0x10, 0x0d>, "s_bcnt1_i32_b64", []>;
>   } // End Defs = [SCC]
>   
> -//defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
> -//defm S_FF0_I32_B64 : SOP1_FF0 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
> +defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
> +defm S_FF0_I32_B64 : SOP1_32_64 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
>   defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
>     [(set i32:$dst, (cttz_zero_undef i32:$src0))]
>   >;
> -////defm S_FF1_I32_B64 : SOP1_FF1 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
> +defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
>   
>   defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
>     [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
>   >;
>   
> -//defm S_FLBIT_I32_B64 : SOP1_32 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
> +defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
>   defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", []>;
> -//defm S_FLBIT_I32_I64 : SOP1_32 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
> +defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
>   defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
>     [(set i32:$dst, (sext_inreg i32:$src0, i8))]
>   >;
> @@ -162,10 +162,10 @@ defm S_SEXT_I32_I16 : SOP1_32 <sop1<0x1a, 0x17>, "s_sext_i32_i16",
>     [(set i32:$dst, (sext_inreg i32:$src0, i16))]
>   >;
>   
> -////defm S_BITSET0_B32 : SOP1_BITSET0 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
> -////defm S_BITSET0_B64 : SOP1_BITSET0 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
> -////defm S_BITSET1_B32 : SOP1_BITSET1 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
> -////defm S_BITSET1_B64 : SOP1_BITSET1 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
> +defm S_BITSET0_B32 : SOP1_32 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
> +defm S_BITSET0_B64 : SOP1_64 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
> +defm S_BITSET1_B32 : SOP1_32 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
> +defm S_BITSET1_B64 : SOP1_64 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
>   defm S_GETPC_B64 : SOP1_64_0 <sop1<0x1f, 0x1c>, "s_getpc_b64", []>;
>   defm S_SETPC_B64 : SOP1_64 <sop1<0x20, 0x1d>, "s_setpc_b64", []>;
>   defm S_SWAPPC_B64 : SOP1_64 <sop1<0x21, 0x1e>, "s_swappc_b64", []>;
> @@ -190,7 +190,7 @@ defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
>   defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
>   defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
>   defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
> -//defm S_CBRANCH_JOIN : SOP1_ <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
> +defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
>   defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
>   let Defs = [SCC] in {
>     defm S_ABS_I32 : SOP1_32 <sop1<0x34, 0x30>, "s_abs_i32", []>;
> diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
> index 3b0971b..3772c7e 100644
> --- a/lib/Target/R600/SIRegisterInfo.td
> +++ b/lib/Target/R600/SIRegisterInfo.td
> @@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
>   //  Register classes used as source and destination
>   //===----------------------------------------------------------------------===//
>   
> +class RegImmMatcher<string name> : AsmOperandClass {
> +  let Name = name;
> +  let RenderMethod = "addRegOrImmOperands";
> +}
> +
>   // Special register classes for predicates and the M0 register
>   def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
>     let CopyCost = -1; // Theoretically it is possible to read from SCC,
> @@ -225,9 +230,13 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
>   //  SSrc_* Operands with an SGPR or a 32-bit immediate
>   //===----------------------------------------------------------------------===//
>   
> -def SSrc_32 : RegImmOperand<SReg_32>;
> +def SSrc_32 : RegImmOperand<SReg_32> {
> +  let ParserMatchClass = RegImmMatcher<"SSrc32">;
> +}
>   
> -def SSrc_64 : RegImmOperand<SReg_64>;
> +def SSrc_64 : RegImmOperand<SReg_64> {
> +  let ParserMatchClass = RegImmMatcher<"SSrc64">;
> +}
>   
>   //===----------------------------------------------------------------------===//
>   //  SCSrc_* Operands with an SGPR or a inline constant
> diff --git a/test/MC/R600/sop1-err.s b/test/MC/R600/sop1-err.s
> new file mode 100644
> index 0000000..96a4ffe
> --- /dev/null
> +++ b/test/MC/R600/sop1-err.s
> @@ -0,0 +1,21 @@
> +// RUN: not llvm-mc -arch=r600 -mcpu=SI %s 2>&1 | FileCheck %s
> +
> +s_mov_b32 v1, s2
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b32 s1, v0
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b64 s1, s[0:1]
> +// CHECK: error: invalid operand for instruction
> +
> +s_mov_b64 s[0:1], s1
> +// CHECK: error: invalid operand for instruction
> +
> +// Immediate greater than 32-bits
> +s_mov_b32 s1, 0xfffffffff
> +// CHECK: error: invalid operand for instruction
> +
> +// Immediate greater than 32-bits
> +s_mov_b64 s[0:1], 0xfffffffff
> +// CHECK: error: invalid operand for instruction
> diff --git a/test/MC/R600/sop1.s b/test/MC/R600/sop1.s
> new file mode 100644
> index 0000000..31133e5
> --- /dev/null
> +++ b/test/MC/R600/sop1.s
> @@ -0,0 +1,157 @@
> +// RUN: llvm-mc -arch=r600 -mcpu=SI  -show-encoding %s | FileCheck %s
> +
> +s_mov_b32 s1, s2
> +// CHECK: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe]
> +
> +s_mov_b32 s1, 1
> +// CHECK: s_mov_b32 s1, 1 ; encoding: [0x81,0x03,0x81,0xbe]
> +
> +s_mov_b32 s1, 100
> +// CHECK: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00]
> +
> +s_mov_b64 s[2:3], s[4:5]
> +// CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
> +
> +s_mov_b64 s[2:3], 0xffffffffffffffff
> +// CHECK: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe]
> +
> +s_cmov_b32 s1, s2
> +// CHECK: s_cmov_b32 s1, s2 ; encoding: [0x02,0x05,0x81,0xbe]
> +
> +s_cmov_b64 s[2:3], s[4:5]
> +// CHECK: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x06,0x82,0xbe]
> +
> +s_not_b32 s1, s2
> +// CHECK: s_not_b32 s1, s2 ; encoding: [0x02,0x07,0x81,0xbe]
> +
> +s_not_b64 s[2:3], s[4:5]
> +// CHECK: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x08,0x82,0xbe]
> +
> +s_wqm_b32 s1, s2
> +// CHECK: s_wqm_b32 s1, s2 ; encoding: [0x02,0x09,0x81,0xbe]
> +
> +s_wqm_b64 s[2:3], s[4:5]
> +// CHECK: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0a,0x82,0xbe]
> +
> +s_brev_b32 s1, s2
> +// CHECK: s_brev_b32 s1, s2 ; encoding: [0x02,0x0b,0x81,0xbe]
> +
> +s_brev_b64 s[2:3], s[4:5]
> +// CHECK: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0c,0x82,0xbe]
> +
> +s_bcnt0_i32_b32 s1, s2
> +// CHECK: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0d,0x81,0xbe]
> +
> +s_bcnt0_i32_b64 s1, s[2:3]
> +// CHECK: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0e,0x81,0xbe]
> +
> +s_bcnt1_i32_b32 s1, s2
> +// CHECK: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0f,0x81,0xbe]
> +
> +s_bcnt1_i32_b64 s1, s[2:3]
> +// CHECK: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x10,0x81,0xbe]
> +
> +s_ff0_i32_b32 s1, s2
> +// CHECK: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x11,0x81,0xbe]
> +
> +s_ff0_i32_b64 s1, s[2:3]
> +// CHECK: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x12,0x81,0xbe]
> +
> +s_ff1_i32_b32 s1, s2
> +// CHECK: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x13,0x81,0xbe]
> +
> +s_ff1_i32_b64 s1, s[2:3]
> +// CHECK: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x14,0x81,0xbe]
> +
> +s_flbit_i32_b32 s1, s2
> +// CHECK: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x15,0x81,0xbe]
> +
> +s_flbit_i32_b64 s1, s[2:3]
> +// CHECK: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x16,0x81,0xbe]
> +
> +s_flbit_i32 s1, s2
> +// CHECK: s_flbit_i32 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe]
> +
> +s_flbit_i32_i64 s1, s[2:3]
> +// CHECK: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x18,0x81,0xbe]
> +
> +s_sext_i32_i8 s1, s2
> +// CHECK: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x19,0x81,0xbe]
> +
> +s_sext_i32_i16 s1, s2
> +// CHECK: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe]
> +
> +s_bitset0_b32 s1, s2
> +// CHECK: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x1b,0x81,0xbe]
> +
> +s_bitset0_b64 s[2:3], s[4:5]
> +// CHECK: s_bitset0_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1c,0x82,0xbe]
> +
> +s_bitset1_b32 s1, s2
> +// CHECK: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1d,0x81,0xbe]
> +
> +s_bitset1_b64 s[2:3], s[4:5]
> +// CHECK: s_bitset1_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1e,0x82,0xbe]
> +
> +s_getpc_b64 s[2:3]
> +// CHECK: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1f,0x82,0xbe]
> +
> +s_setpc_b64 s[2:3], s[4:5]
> +// CHECK: s_setpc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x20,0x82,0xbe]
> +
> +s_swappc_b64 s[2:3], s[4:5]
> +// CHECK: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe]
> +
> +s_rfe_b64 s[2:3], s[4:5]
> +// CHECK: s_rfe_b64 s[2:3], s[4:5] ; encoding: [0x04,0x22,0x82,0xbe]
> +
> +s_and_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe]
> +
> +s_or_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe]
> +
> +s_xor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe]
> +
> +s_andn2_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe]
> +
> +s_orn2_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x28,0x82,0xbe]
> +
> +s_nand_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe]
> +
> +s_nor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2a,0x82,0xbe]
> +
> +s_xnor_saveexec_b64 s[2:3], s[4:5]
> +// CHECK: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe]
> +
> +s_quadmask_b32 s1, s2
> +// CHECK: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe]
> +
> +s_quadmask_b64 s[2:3], s[4:5]
> +// CHECK: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe]
> +
> +s_movrels_b32 s1, s2
> +// CHECK: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2e,0x81,0xbe]
> +
> +s_movrels_b64 s[2:3], s[4:5]
> +// CHECK: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2f,0x82,0xbe]
> +
> +s_movreld_b32 s1, s2
> +// CHECK: s_movreld_b32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe]
> +
> +s_movreld_b64 s[2:3], s[4:5]
> +// CHECK: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x31,0x82,0xbe]
> +
> +s_cbranch_join s[4:5]
> +// CHECK: s_cbranch_join s[4:5] ; encoding: [0x04,0x32,0x80,0xbe]
> +
> +s_abs_i32 s1, s2
> +// CHECK: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe]
> +
> +s_mov_fed_b32 s1, s2
> +// CHECK: s_mov_fed_b32 s1, s2 ; encoding: [0x02,0x35,0x81,0xbe]
> -- 2.0.4
Some cases with literals for other operands in other instructions might 
be useful. The only immediate operands I see are on mov instructions / 
first source operand
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150212/3bc0215f/attachment.html>