[llvm] r267553 - [AMDGPU] Assembler: basic support for SDWA instructions

Sam Kolton via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 26 06:33:57 PDT 2016


Author: skolton
Date: Tue Apr 26 08:33:56 2016
New Revision: 267553

URL: http://llvm.org/viewvc/llvm-project?rev=267553&view=rev
Log:
[AMDGPU] Assembler: basic support for SDWA instructions

Support for SDWA instructions for VOP1 and VOP2 encoding.
Not done yet:
  - converters for support optional operands and modifiers
  - VOPC
  - sext() modifier
  - intrinsics
  - VOP2b (see vop_dpp.s)
  - V_MAC_F32 (see vop_dpp.s)

Differential Revision: http://reviews.llvm.org/D19360

Added:
    llvm/trunk/test/MC/AMDGPU/vop_sdwa.s
Modified:
    llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
    llvm/trunk/lib/Target/AMDGPU/SIDefines.h
    llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
    llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td

Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=267553&r1=267552&r2=267553&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Tue Apr 26 08:33:56 2016
@@ -74,6 +74,8 @@ public:
     ImmTyDppRowMask,
     ImmTyDppBankMask,
     ImmTyDppBoundCtrl,
+    ImmTySdwaSel,
+    ImmTySdwaDstUnused,
     ImmTyDMask,
     ImmTyUNorm,
     ImmTyDA,
@@ -253,6 +255,14 @@ public:
     return isImmTy(ImmTyDppBoundCtrl);
   }
 
+  bool isSDWASel() const {
+    return isImmTy(ImmTySdwaSel);
+  }
+
+  bool isSDWADstUnused() const {
+    return isImmTy(ImmTySdwaDstUnused);
+  }
+
   void setModifiers(unsigned Mods) {
     assert(isReg() || (isImm() && Imm.Modifiers == 0));
     if (isReg())
@@ -522,6 +532,7 @@ public:
   OperandMatchResultTy parseOptionalOps(
                                    const ArrayRef<OptionalOperand> &OptionalOps,
                                    OperandVector &Operands);
+  OperandMatchResultTy parseStringWithPrefix(const char *Prefix, StringRef &Value);
 
 
   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
@@ -569,6 +580,9 @@ public:
   void cvtDPP_mod(MCInst &Inst, const OperandVector &Operands);
   void cvtDPP_nomod(MCInst &Inst, const OperandVector &Operands);
   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool HasMods);
+
+  OperandMatchResultTy parseSDWASel(OperandVector &Operands);
+  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
 };
 
 struct OptionalOperand {
@@ -1396,6 +1410,30 @@ AMDGPUAsmParser::parseOptionalOps(const
   return MatchOperand_NoMatch;
 }
 
+AMDGPUAsmParser::OperandMatchResultTy 
+AMDGPUAsmParser::parseStringWithPrefix(const char *Prefix, StringRef &Value) {
+  if (getLexer().isNot(AsmToken::Identifier)) {
+    return MatchOperand_NoMatch;
+  }
+  StringRef Tok = Parser.getTok().getString();
+  if (Tok != Prefix) {
+    return MatchOperand_NoMatch;
+  }
+
+  Parser.Lex();
+  if (getLexer().isNot(AsmToken::Colon)) {
+    return MatchOperand_ParseFail;
+  }
+    
+  Parser.Lex();
+  if (getLexer().isNot(AsmToken::Identifier)) {
+    return MatchOperand_ParseFail;
+  }
+
+  Value = Parser.getTok().getString();
+  return MatchOperand_Success;
+}
+
 //===----------------------------------------------------------------------===//
 // ds
 //===----------------------------------------------------------------------===//
@@ -2296,6 +2334,80 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Ins
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
 }
 
+//===----------------------------------------------------------------------===//
+// sdwa
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSDWASel(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  StringRef Value;
+  AMDGPUAsmParser::OperandMatchResultTy res;
+  
+  res = parseStringWithPrefix("dst_sel", Value);
+  if (res == MatchOperand_ParseFail) {
+    return MatchOperand_ParseFail;
+  } else if (res == MatchOperand_NoMatch) {
+    res = parseStringWithPrefix("src0_sel", Value);
+    if (res == MatchOperand_ParseFail) {
+      return MatchOperand_ParseFail;
+    } else if (res == MatchOperand_NoMatch) {
+      res = parseStringWithPrefix("src1_sel", Value);
+      if (res != MatchOperand_Success) {
+        return res;
+      }
+    }
+  }
+  
+  int64_t Int;
+  Int = StringSwitch<int64_t>(Value)
+        .Case("BYTE_0", 0)
+        .Case("BYTE_1", 1)
+        .Case("BYTE_2", 2)
+        .Case("BYTE_3", 3)
+        .Case("WORD_0", 4)
+        .Case("WORD_1", 5)
+        .Case("DWORD", 6)
+        .Default(0xffffffff);
+  Parser.Lex(); // eat last token
+
+  if (Int == 0xffffffff) {
+    return MatchOperand_ParseFail;
+  }
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
+                                              AMDGPUOperand::ImmTySdwaSel));
+  return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy 
+AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  StringRef Value;
+  AMDGPUAsmParser::OperandMatchResultTy res;
+
+  res = parseStringWithPrefix("dst_unused", Value);
+  if (res != MatchOperand_Success) {
+    return res;
+  }
+
+  int64_t Int;
+  Int = StringSwitch<int64_t>(Value)
+        .Case("UNUSED_PAD", 0)
+        .Case("UNUSED_SEXT", 1)
+        .Case("UNUSED_PRESERVE", 2)
+        .Default(0xffffffff);
+  Parser.Lex(); // eat last token
+
+  if (Int == 0xffffffff) {
+    return MatchOperand_ParseFail;
+  }
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
+                                              AMDGPUOperand::ImmTySdwaDstUnused));
+  return MatchOperand_Success;
+}
+
 
 /// Force static initialization.
 extern "C" void LLVMInitializeAMDGPUAsmParser() {

Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=267553&r1=267552&r2=267553&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Tue Apr 26 08:33:56 2016
@@ -282,6 +282,8 @@ void AMDGPUInstPrinter::printVOPDst(cons
     O << "_e64 ";
   else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::DPP)
     O << "_dpp ";
+  else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SDWA)
+    O << "_sdwa ";
   else
     O << "_e32 ";
 
@@ -479,6 +481,51 @@ void AMDGPUInstPrinter::printBoundCtrlOp
   }
 }
 
+void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  switch (Imm) {
+  case 0: O << "BYTE_0"; break;
+  case 1: O << "BYTE_1"; break;
+  case 2: O << "BYTE_2"; break;
+  case 3: O << "BYTE_3"; break;
+  case 4: O << "WORD_0"; break;
+  case 5: O << "WORD_1"; break;
+  case 6: O << "DWORD"; break;
+  default: llvm_unreachable("Invalid SDWA data select operand");
+  }
+}
+
+void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  O << "dst_sel:";
+  printSDWASel(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  O << "src0_sel:";
+  printSDWASel(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  O << "src1_sel:";
+  printSDWASel(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  O << "dst_unused:";
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  switch (Imm) {
+  case 0: O << "UNUSED_PAD"; break;
+  case 1: O << "UNUSED_SEXT"; break;
+  case 2: O << "UNUSED_PRESERVE"; break;
+  default: llvm_unreachable("Invalid SDWA dest_unused operand");
+  }
+}
+
 void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
                                         raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();

Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h?rev=267553&r1=267552&r2=267553&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h Tue Apr 26 08:33:56 2016
@@ -67,6 +67,11 @@ private:
   void printRowMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printBankMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printBoundCtrlOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSDWADstSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSDWASrc1Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSDWADstUnused(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,

Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=267553&r1=267552&r2=267553&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Tue Apr 26 08:33:56 2016
@@ -29,17 +29,18 @@ enum {
   VOP2 = 1 << 11,
   VOP3 = 1 << 12,
   VOPC = 1 << 13,
-  DPP = 1 << 14,
+  SDWA = 1 << 14,
+  DPP = 1 << 15,
 
-  MUBUF = 1 << 15,
-  MTBUF = 1 << 16,
-  SMRD = 1 << 17,
-  DS = 1 << 18,
-  MIMG = 1 << 19,
-  FLAT = 1 << 20,
-  WQM = 1 << 21,
-  VGPRSpill = 1 << 22,
-  VOPAsmPrefer32Bit = 1 << 23
+  MUBUF = 1 << 16,
+  MTBUF = 1 << 17,
+  SMRD = 1 << 18,
+  DS = 1 << 19,
+  MIMG = 1 << 20,
+  FLAT = 1 << 21,
+  WQM = 1 << 22,
+  VGPRSpill = 1 << 23,
+  VOPAsmPrefer32Bit = 1 << 24
 };
 }
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=267553&r1=267552&r2=267553&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Tue Apr 26 08:33:56 2016
@@ -31,6 +31,7 @@ class InstSI <dag outs, dag ins, string
   field bits<1> VOP2 = 0;
   field bits<1> VOP3 = 0;
   field bits<1> VOPC = 0;
+  field bits<1> SDWA = 0;
   field bits<1> DPP = 0;
 
   field bits<1> MUBUF = 0;
@@ -64,17 +65,18 @@ class InstSI <dag outs, dag ins, string
   let TSFlags{11} = VOP2;
   let TSFlags{12} = VOP3;
   let TSFlags{13} = VOPC;
-  let TSFlags{14} = DPP;
+  let TSFlags{14} = SDWA;
+  let TSFlags{15} = DPP;
 
-  let TSFlags{15} = MUBUF;
-  let TSFlags{16} = MTBUF;
-  let TSFlags{17} = SMRD;
-  let TSFlags{18} = DS;
-  let TSFlags{19} = MIMG;
-  let TSFlags{20} = FLAT;
-  let TSFlags{21} = WQM;
-  let TSFlags{22} = VGPRSpill;
-  let TSFlags{23} = VOPAsmPrefer32Bit;
+  let TSFlags{16} = MUBUF;
+  let TSFlags{17} = MTBUF;
+  let TSFlags{18} = SMRD;
+  let TSFlags{19} = DS;
+  let TSFlags{20} = MIMG;
+  let TSFlags{21} = FLAT;
+  let TSFlags{22} = WQM;
+  let TSFlags{23} = VGPRSpill;
+  let TSFlags{24} = VOPAsmPrefer32Bit;
 
   let SchedRW = [Write32Bit];
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=267553&r1=267552&r2=267553&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Tue Apr 26 08:33:56 2016
@@ -576,6 +576,22 @@ class DPPOptionalMatchClass <string OpNa
   let IsOptional = 1;
 }
 
+def SDWASelMatchClass : AsmOperandClass {
+  let Name = "SDWASel";
+  let PredicateMethod = "isSDWASel";
+  let ParserMethod = "parseSDWASel";
+  let RenderMethod = "addImmOperands";
+  let IsOptional = 1;
+}
+
+def SDWADstUnusedMatchClass : AsmOperandClass {
+  let Name = "SDWADstUnused";
+  let PredicateMethod = "isSDWADstUnused";
+  let ParserMethod = "parseSDWADstUnused";
+  let RenderMethod = "addImmOperands";
+  let IsOptional = 1;
+}
+
 class OptionalImmAsmOperand <string OpName> : AsmOperandClass {
   let Name = "Imm"#OpName;
   let PredicateMethod = "isImm";
@@ -737,11 +753,31 @@ def bound_ctrl : Operand <i1> {
   let ParserMatchClass = DPPOptionalMatchClass<"BoundCtrl">;
 }
 
+def dst_sel : Operand <i32> {
+  let PrintMethod = "printSDWADstSel";
+  let ParserMatchClass = SDWASelMatchClass;
+}
+
+def src0_sel : Operand <i32> {
+  let PrintMethod = "printSDWASrc0Sel";
+  let ParserMatchClass = SDWASelMatchClass;
+}
+
+def src1_sel : Operand <i32> {
+  let PrintMethod = "printSDWASrc1Sel";
+  let ParserMatchClass = SDWASelMatchClass;
+}
+
 def hwreg : Operand <i16> {
   let PrintMethod = "printHwreg";
   let ParserMatchClass = HwregMatchClass;
 }
 
+def dst_unused : Operand <i32> {
+  let PrintMethod = "printSDWADstUnused";
+  let ParserMatchClass = SDWADstUnusedMatchClass;
+}
+
 } // End OperandType = "OPERAND_IMMEDIATE"
 
 
@@ -1316,16 +1352,11 @@ class getVOPSrc0ForVT<ValueType VT> {
   RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32);
 }
 
-// Returns the register class to use for source 1 of VOP[12C] for the
-// given VT.
-class getVOPSrc1ForVT<ValueType VT> {
+// Returns the vreg register class to use for source operand given VT
+class getVregSrcForVT<ValueType VT> {
   RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
 }
 
-// Returns the register class to use for DPP source operands.
-class getDPPSrcForVT<ValueType VT> {
-  RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
-}
 
 // Returns the register class to use for sources of VOP3 instructions for the
 // given VT.
@@ -1431,7 +1462,40 @@ class getInsDPP <RegisterClass Src0RC, R
              /* endif */)));
 }
 
-class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
+class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
+                                                             bit HasModifiers> {
+
+  dag ret = !if (!eq(NumSrcArgs, 0),
+                // VOP1 without input operands (V_NOP)
+                (ins),
+            !if (!eq(NumSrcArgs, 1),
+              !if (!eq(HasModifiers, 1),
+                // VOP1_SDWA with modifiers
+                (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+                     ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     src0_sel:$src0_sel)
+              /* else */,
+                // VOP1_SDWA without modifiers
+                (ins Src0RC:$src0, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     src0_sel:$src0_sel)
+              /* endif */)
+              /* NumSrcArgs == 2 */,
+              !if (!eq(HasModifiers, 1),
+                // VOP2_SDWA with modifiers
+                (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+                     InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
+                     ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     src0_sel:$src0_sel, src1_sel:$src1_sel)
+              /* else */,
+                // VOP2_DPP without modifiers
+                (ins Src0RC:$src0, Src1RC:$src1,
+                     dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     src0_sel:$src0_sel, src1_sel:$src1_sel)
+             /* endif */)));
+}
+
+// Outs for DPP and SDWA
+class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
   dag ret = !if(HasDst,
                 !if(!eq(DstVT.Size, 1),
                     (outs DstRCDPP:$sdst), // sdst for VOPC
@@ -1484,20 +1548,41 @@ class getAsmDPP <bit HasDst, int NumSrcA
   string ret = dst#args#" $dpp_ctrl $row_mask $bank_mask $bound_ctrl";
 }
 
-class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
+class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
+  string dst = !if(HasDst,
+                   !if(!eq(DstVT.Size, 1),
+                       "$sdst",
+                       "$vdst"),
+                    ""); // use $sdst for VOPC
+  string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
+  string src1 = !if(!eq(NumSrcArgs, 1), "",
+                   !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+                                           " $src1_modifiers,"));
+  string args = !if(!eq(HasModifiers, 0),
+                     getAsm32<0, NumSrcArgs, DstVT>.ret,
+                     ", "#src0#src1#", $clamp");
+  string sdwa = !if(!eq(NumSrcArgs, 0),
+                    "",
+                    !if(!eq(NumSrcArgs, 1),
+                        " $dst_sel $dst_unused $src0_sel",
+                        " $dst_sel $dst_unused $src0_sel $src1_sel"
+                    )
+                );
+  string ret = dst#args#sdwa;
+}
+
+// Function that checks if instruction supports DPP and SDWA
+class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
                  ValueType Src1VT = i32> {
   bit ret = !if(!eq(NumSrcArgs, 3),
-                0, // NumSrcArgs == 3 - No DPP for VOP3
-                !if(!eq(DstVT.Size, 1),
-                    0, // No DPP for VOPC
-                    !if(!eq(DstVT.Size, 64),
-                        0, // 64-bit dst - No DPP for 64-bit operands
+                0, // NumSrcArgs == 3 - No DPP or SDWA for VOP3
+                !if(!eq(DstVT.Size, 64),
+                    0, // 64-bit dst - No DPP or SDWA for 64-bit operands
+                    !if(!eq(Src0VT.Size, 64),
+                        0, // 64-bit src0
                         !if(!eq(Src0VT.Size, 64),
-                            0, // 64-bit src0
-                            !if(!eq(Src0VT.Size, 64),
-                                0, // 64-bit src2
-                                1
-                            )
+                            0, // 64-bit src2
+                            1
                         )
                     )
                 )
@@ -1514,41 +1599,47 @@ class VOPProfile <list<ValueType> _ArgVT
   field ValueType Src2VT = ArgVT[3];
   field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
   field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
+  field RegisterOperand DstRCSDWA = getVALUDstForVT<DstVT>.ret;
   field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
-  field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
+  field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
   field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
   field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
   field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
-  field RegisterClass Src0DPP = getDPPSrcForVT<Src0VT>.ret;
-  field RegisterClass Src1DPP = getDPPSrcForVT<Src1VT>.ret;
-
+  field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
+  field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
+  field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret;
+  field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret;
+  
   field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
   field bit HasDst32 = HasDst;
   field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
   field bit HasModifiers = hasModifiers<Src0VT>.ret;
 
-  field bit HasDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
-
+  field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
+  
   field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
 
   // VOP3b instructions are a special case with a second explicit
   // output. This is manually overridden for them.
   field dag Outs32 = Outs;
   field dag Outs64 = Outs;
-  field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
+  field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
+  field dag OutsSDWA = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
 
   field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
   field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
                              HasModifiers>.ret;
   field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs, HasModifiers>.ret;
+  field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, HasModifiers>.ret;
 
   field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
   field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
   field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+  field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
 }
 
-class VOP_NO_DPP <VOPProfile p> : VOPProfile <p.ArgVT> {
-  let HasDPP = 0;
+class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
+  let HasExt = 0;
 }
 
 // FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
@@ -1659,12 +1750,12 @@ def VOP_F32_F32_F32_F32 : VOPProfile <[f
 def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
   field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32imm:$imm);
   field string Asm32 = "$vdst, $src0, $src1, $imm";
-  field bit HasDPP = 0;
+  field bit HasExt = 0;
 }
 def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
   field dag Ins32 = (ins VCSrc_32:$src0, u32imm:$imm, VGPR_32:$src1);
   field string Asm32 = "$vdst, $src0, $imm, $src1";
-  field bit HasDPP = 0;
+  field bit HasExt = 0;
 }
 def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
   let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
@@ -1675,9 +1766,15 @@ def VOP_MAC : VOPProfile <[f32, f32, f32
                     VGPR_32:$src2, // stub argument
                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+  let InsSDWA = (ins InputModsNoDefault:$src0_modifiers, Src0RC32:$src0,
+                     InputModsNoDefault:$src1_modifiers, Src1RC32:$src1,
+                     VGPR_32:$src2, // stub argument
+                     ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     src0_sel:$src0_sel, src1_sel:$src1_sel);
   let Asm32 = getAsm32<1, 2, f32>.ret;
   let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
   let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret;
+  let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
 }
 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
@@ -1787,13 +1884,37 @@ multiclass VOP1_m <vop1 op, string opNam
 class VOP1_DPP <vop1 op, string opName, VOPProfile p> :
   VOP1_DPPe <op.VI>,
   VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
-  let AssemblerPredicates = !if(p.HasDPP, [isVI], [DisableInst]);
+  let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
   let DecoderNamespace = "DPP";
   let DisableDecoder = DisableVIDecoder;
   let src0_modifiers = !if(p.HasModifiers, ?, 0);
   let src1_modifiers = 0;
 }
 
+class SDWADisableFields <VOPProfile p> {
+  bits<8> src0 = !if(!eq(p.NumSrcArgs, 0), 0, ?);
+  bits<3> src0_sel = !if(!eq(p.NumSrcArgs, 0), 6, ?);
+  bits<3> src0_modifiers = !if(p.HasModifiers, ?, 0);
+  bits<3> src1_sel = !if(!eq(p.NumSrcArgs, 0), 6, 
+                         !if(!eq(p.NumSrcArgs, 1), 6,
+                             ?));
+  bits<3> src1_modifiers = !if(!eq(p.NumSrcArgs, 0), 0, 
+                               !if(!eq(p.NumSrcArgs, 1), 0,
+                                   !if(p.HasModifiers, ?, 0)));
+  bits<3> dst_sel = !if(p.HasDst, ?, 6);
+  bits<2> dst_unused = !if(p.HasDst, ?, 0);
+  bits<1> clamp = !if(p.HasModifiers, ?, 0);
+}
+
+class VOP1_SDWA <vop1 op, string opName, VOPProfile p> :
+  VOP1_SDWAe <op.VI>,
+  VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
+  SDWADisableFields <p> {
+  let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
+  let DecoderNamespace = "SDWA";
+  let DisableDecoder = DisableVIDecoder;
+}
+
 multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
                      string asm = opName#p.Asm32> {
 
@@ -1851,13 +1972,22 @@ multiclass VOP2_m <vop2 op, string opNam
 class VOP2_DPP <vop2 op, string opName, VOPProfile p> :
   VOP2_DPPe <op.VI>,
   VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
-  let AssemblerPredicates = !if(p.HasDPP, [isVI], [DisableInst]);
+  let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
   let DecoderNamespace = "DPP";
   let DisableDecoder = DisableVIDecoder;
   let src0_modifiers = !if(p.HasModifiers, ?, 0);
   let src1_modifiers = !if(p.HasModifiers, ?, 0);
 }
 
+class VOP2_SDWA <vop2 op, string opName, VOPProfile p> :
+  VOP2_SDWAe <op.VI>,
+  VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
+  SDWADisableFields <p> {
+  let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
+  let DecoderNamespace = "SDWA";
+  let DisableDecoder = DisableVIDecoder;
+}
+
 class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
 
   bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
@@ -2089,6 +2219,8 @@ multiclass VOP1_Helper <vop1 op, string
                         p.HasModifiers>;
 
   def _dpp : VOP1_DPP <op, opName, p>;
+
+  def _sdwa : VOP1_SDWA <op, opName, p>;
 }
 
 multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
@@ -2122,6 +2254,8 @@ multiclass VOP2_Helper <vop2 op, string
                         revOp, p.HasModifiers>;
 
   def _dpp : VOP2_DPP <op, opName, p>;
+
+  def _sdwa : VOP2_SDWA <op, opName, p>;
 }
 
 multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=267553&r1=267552&r2=267553&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Tue Apr 26 08:33:56 2016
@@ -1394,13 +1394,13 @@ defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0
   VOP_F32_F32, int_amdgcn_frexp_mant
 >;
 let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
-defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NO_DPP<VOP_NONE>>;
+defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
 }
 
 let Uses = [M0, EXEC] in {
-defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_NO_DPP<VOP_I32_I32>>;
-defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_NO_DPP<VOP_I32_I32>>;
-defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_NO_DPP<VOP_I32_I32>>;
+defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_NO_EXT<VOP_I32_I32>>;
+defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_NO_EXT<VOP_I32_I32>>;
+defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
 } // End Uses = [M0, EXEC]
 
 // These instruction only exist on SI and CI

Modified: llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td?rev=267553&r1=267552&r2=267553&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td Tue Apr 26 08:33:56 2016
@@ -225,6 +225,61 @@ class VOP2_DPPe <bits<6> op> : VOP_DPPe
   let Inst{31} = 0x0; //encoding
 }
 
+class VOP_SDWA <dag outs, dag ins, string asm, list<dag> pattern, bit HasMods = 0> :
+    VOPAnyCommon <outs, ins, asm, pattern> {
+  let SDWA = 1;
+  let Size = 8;
+}
+
+class VOP_SDWAe : Enc64 {
+  bits<8> src0;
+  bits<3> src0_sel;
+  bits<3> src0_modifiers; // {abs,neg,sext}
+  bits<3> src1_sel;
+  bits<3> src1_modifiers;
+  bits<3> dst_sel;
+  bits<2> dst_unused;
+  bits<1> clamp;
+
+  let Inst{39-32} = src0;
+  let Inst{42-40} = dst_sel;
+  let Inst{44-43} = dst_unused;
+  let Inst{45} = clamp;
+  let Inst{50-48} = src0_sel;
+  let Inst{53-51} = src0_modifiers;
+  let Inst{58-56} = src1_sel;
+  let Inst{61-59} = src1_modifiers;
+}
+
+class VOP1_SDWAe <bits<8> op> : VOP_SDWAe {
+  bits<8> vdst;
+
+  let Inst{8-0} = 0xf9; // sdwa
+  let Inst{16-9} = op;
+  let Inst{24-17} = vdst;
+  let Inst{31-25} = 0x3f; // encoding
+}
+
+class VOP2_SDWAe <bits<6> op> : VOP_SDWAe {
+  bits<8> vdst;
+  bits<8> src1;
+
+  let Inst{8-0} = 0xf9; // sdwa
+  let Inst{16-9} = src1;
+  let Inst{24-17} = vdst;
+  let Inst{30-25} = op;
+  let Inst{31} = 0x0; // encoding
+}
+
+class VOPC_SDWAe <bits<8> op> : VOP_SDWAe {
+  bits<8> src1;
+
+  let Inst{8-0} = 0xf9; // sdwa
+  let Inst{16-9} = src1;
+  let Inst{24-17} = op;
+  let Inst{31-25} = 0x3e; // encoding
+}
+
 class EXPe_vi : EXPe {
   let Inst{31-26} = 0x31; //encoding
 }

Added: llvm/trunk/test/MC/AMDGPU/vop_sdwa.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop_sdwa.s?rev=267553&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop_sdwa.s (added)
+++ llvm/trunk/test/MC/AMDGPU/vop_sdwa.s Tue Apr 26 08:33:56 2016
@@ -0,0 +1,40 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI
+// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI
+
+// ToDo: converters
+// ToDo: VOPC
+// ToDo: VOP2b (see vop_dpp.s)
+// ToDo: V_MAC_F32 (see vop_dpp.s)
+// ToDo: sext()
+// ToDo: intrinsics
+
+
+// NOSICI: error:
+// VI: v_mov_b32_sdwa v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x02,0x10,0x06,0x06]
+v_mov_b32 v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD
+
+// NOSICI: error:
+// VI: v_mov_b32_sdwa v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x06,0x7e,0x04,0x11,0x05,0x06]
+v_mov_b32 v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1
+
+// NOSICI: error:
+// VI: v_mov_b32_sdwa v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0 ; encoding: [0xf9,0x02,0x1e,0x7e,0x63,0x0a,0x04,0x06]
+v_mov_b32 v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0
+
+// NOSICI: error:
+// VI: v_min_u32_sdwa v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 ; encoding: [0xf9,0x02,0x84,0x1d,0x0d,0x0b,0x03,0x02]
+v_min_u32 v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2
+
+// NOSICI: error:
+// VI: v_min_u32_sdwa v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1 ; encoding: [0xf9,0x02,0xfe,0x1d,0x04,0x04,0x02,0x05]
+v_min_u32 v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1
+
+// NOSICI: error:
+// VI: v_min_u32_sdwa v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x90,0x1d,0xc8,0x05,0x01,0x06]
+v_min_u32 v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+
+// NOSICI: error:
+// VI: v_min_u32_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x1c,0x01,0x06,0x00,0x06]
+v_min_u32 v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD




More information about the llvm-commits mailing list