[llvm] r308740 - [AMDGPU][MC][GFX9] Added support of VOP3 'op_sel' modifier

Dmitry Preobrazhensky via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 21 06:54:11 PDT 2017


Author: dpreobra
Date: Fri Jul 21 06:54:11 2017
New Revision: 308740

URL: http://llvm.org/viewvc/llvm-project?rev=308740&view=rev
Log:
[AMDGPU][MC][GFX9] Added support of VOP3 'op_sel' modifier

See bug 33591: https://bugs.llvm.org//show_bug.cgi?id=33591

Reviewers: vpykhtin, artem.tamazov, SamWot, arsenm

Differential Revision: https://reviews.llvm.org/D35424

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
    llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
    llvm/trunk/lib/Target/AMDGPU/SIDefines.h
    llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
    llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
    llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td
    llvm/trunk/test/MC/AMDGPU/gfx9_asm_all.s
    llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s
    llvm/trunk/test/MC/AMDGPU/vop3p-err.s
    llvm/trunk/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=308740&r1=308739&r2=308740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Fri Jul 21 06:54:11 2017
@@ -174,6 +174,14 @@ private:
   bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
                         SDValue &Clamp) const;
 
+  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
+                        SDValue &Clamp) const;
+
+  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+                            SDValue &Clamp) const;
+
   void SelectADD_SUB_I64(SDNode *N);
   void SelectUADDO_USUBO(SDNode *N);
   void SelectDIV_SCALE(SDNode *N);
@@ -1864,6 +1872,42 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods
   return SelectVOP3PMods(In, Src, SrcMods);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
+                                         SDValue &SrcMods) const {
+  Src = In;
+  // FIXME: Handle op_sel
+  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
+                                          SDValue &SrcMods,
+                                          SDValue &Clamp) const {
+  SDLoc SL(In);
+
+  // FIXME: Handle clamp
+  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
+
+  return SelectVOP3OpSel(In, Src, SrcMods);
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
+                                             SDValue &SrcMods) const {
+  // FIXME: Handle op_sel
+  return SelectVOP3Mods(In, Src, SrcMods);
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
+                                              SDValue &SrcMods,
+                                              SDValue &Clamp) const {
+  SDLoc SL(In);
+
+  // FIXME: Handle clamp
+  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
+
+  return SelectVOP3OpSelMods(In, Src, SrcMods);
+}
+
 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
   const AMDGPUTargetLowering& Lowering =
     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());

Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=308740&r1=308739&r2=308740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Fri Jul 21 06:54:11 2017
@@ -1060,6 +1060,7 @@ public:
 
   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
                OptionalImmIndexMap &OptionalIdx);
+  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
 
@@ -2688,7 +2689,7 @@ OperandMatchResultTy AMDGPUAsmParser::pa
 
   // FIXME: How to verify the number of elements matches the number of src
   // operands?
-  for (int I = 0; I < 3; ++I) {
+  for (int I = 0; I < 4; ++I) {
     if (I != 0) {
       if (getLexer().is(AsmToken::RBrac))
         break;
@@ -4088,6 +4089,30 @@ OperandMatchResultTy AMDGPUAsmParser::pa
   return MatchOperand_NoMatch;
 }
 
+void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
+  cvtVOP3P(Inst, Operands);
+
+  int Opc = Inst.getOpcode();
+
+  int SrcNum;
+  const int Ops[] = { AMDGPU::OpName::src0,
+                      AMDGPU::OpName::src1,
+                      AMDGPU::OpName::src2 };
+  for (SrcNum = 0;
+       SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
+       ++SrcNum);
+  assert(SrcNum > 0);
+
+  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+
+  if ((OpSel & (1 << SrcNum)) != 0) {
+    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+    uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
+    Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
+  }
+}
+
 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
       // 1. This operand is input modifiers
   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
@@ -4172,7 +4197,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &I
   int Opc = Inst.getOpcode();
 
   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
-  addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1);
+
+  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
+  if (OpSelHiIdx != -1) {
+    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1);
+  }
 
   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
   if (NegLoIdx != -1) {
@@ -4188,13 +4217,16 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &I
                          AMDGPU::OpName::src2_modifiers };
 
   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
-  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
 
   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
-  unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
+  unsigned OpSelHi = 0;
   unsigned NegLo = 0;
   unsigned NegHi = 0;
 
+  if (OpSelHiIdx != -1) {
+    OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
+  }
+
   if (NegLoIdx != -1) {
     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
     NegLo = Inst.getOperand(NegLoIdx).getImm();

Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=308740&r1=308739&r2=308740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Fri Jul 21 06:54:11 2017
@@ -803,7 +803,8 @@ void AMDGPUInstPrinter::printExpTgt(cons
   }
 }
 
-static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod) {
+static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod,
+                               bool HasDstSel) {
   int DefaultValue = (Mod == SISrcMods::OP_SEL_1);
 
   for (int I = 0; I < NumOps; ++I) {
@@ -811,11 +812,16 @@ static bool allOpsDefaultValue(const int
       return false;
   }
 
+  if (HasDstSel && (Ops[0] & SISrcMods::DST_OP_SEL) != 0)
+    return false;
+
   return true;
 }
 
-static void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
-                                raw_ostream &O) {
+void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
+                                            StringRef Name,
+                                            unsigned Mod,
+                                            raw_ostream &O) {
   unsigned Opc = MI->getOpcode();
   int NumOps = 0;
   int Ops[3];
@@ -830,7 +836,12 @@ static void printPackedModifier(const MC
     Ops[NumOps++] = MI->getOperand(Idx).getImm();
   }
 
-  if (allOpsDefaultValue(Ops, NumOps, Mod))
+  const bool HasDstSel =
+    NumOps > 0 &&
+    Mod == SISrcMods::OP_SEL_0 &&
+    MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3_OPSEL;
+
+  if (allOpsDefaultValue(Ops, NumOps, Mod, HasDstSel))
     return;
 
   O << Name;
@@ -841,6 +852,10 @@ static void printPackedModifier(const MC
     O << !!(Ops[I] & Mod);
   }
 
+  if (HasDstSel) {
+    O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL);
+  }
+
   O << ']';
 }
 

Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h?rev=308740&r1=308739&r2=308740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h Fri Jul 21 06:54:11 2017
@@ -127,6 +127,8 @@ private:
                         const MCSubtargetInfo &STI, raw_ostream &O);
   void printSDWADstUnused(const MCInst *MI, unsigned OpNo,
                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
+                           raw_ostream &O);
   void printOpSel(const MCInst *MI, unsigned OpNo,
                   const MCSubtargetInfo &STI, raw_ostream &O);
   void printOpSelHi(const MCInst *MI, unsigned OpNo,

Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=308740&r1=308739&r2=308740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Fri Jul 21 06:54:11 2017
@@ -67,7 +67,8 @@ enum : uint64_t {
   SCALAR_STORE = UINT64_C(1) << 39,
   FIXED_SIZE = UINT64_C(1) << 40,
   VOPAsmPrefer32Bit = UINT64_C(1) << 41,
-  HasFPClamp = UINT64_C(1) << 42
+  HasFPClamp = UINT64_C(1) << 42,
+  VOP3_OPSEL = UINT64_C(1) << 43
 };
 
 // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
@@ -137,7 +138,8 @@ namespace SISrcMods {
    SEXT = 1 << 0,  // Integer sign-extend modifier
    NEG_HI = ABS,   // Floating-point negate high packed component modifier.
    OP_SEL_0 = 1 << 2,
-   OP_SEL_1 = 1 << 3
+   OP_SEL_1 = 1 << 3,
+   DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1)
   };
 }
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=308740&r1=308739&r2=308740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Fri Jul 21 06:54:11 2017
@@ -83,6 +83,10 @@ class InstSI <dag outs, dag ins, string
   // the clamp modifier has floating point semantics.
   field bit FPClamp = 0;
 
+  // This bit indicates that this is a VOP3 opcode which supports op_sel
+  // modifier (gfx9 only).
+  field bit VOP3_OPSEL = 0;
+
   // These need to be kept in sync with the enum in SIInstrFlags.
   let TSFlags{0} = SALU;
   let TSFlags{1} = VALU;
@@ -127,6 +131,7 @@ class InstSI <dag outs, dag ins, string
   let TSFlags{40} = FixedSize;
   let TSFlags{41} = VOPAsmPrefer32Bit;
   let TSFlags{42} = FPClamp;
+  let TSFlags{43} = VOP3_OPSEL;
 
   let SchedRW = [Write32Bit];
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=308740&r1=308739&r2=308740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Fri Jul 21 06:54:11 2017
@@ -659,6 +659,15 @@ class IntInputMods <IntInputModsMatchCla
 def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
 def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
 
+class OpSelModsMatchClass : AsmOperandClass {
+  let Name = "OpSelMods";
+  let ParserMethod = "parseRegOrImm";
+  let PredicateMethod = "isRegOrImm";
+}
+
+def IntOpSelModsMatchClass : OpSelModsMatchClass;
+def IntOpSelMods : InputMods<IntOpSelModsMatchClass>;
+
 def FPRegSDWAInputModsMatchClass : AsmOperandClass {
   let Name = "SDWARegWithFPInputMods";
   let ParserMethod = "parseRegWithFPInputMods";
@@ -750,6 +759,11 @@ def VOP3OMods : ComplexPattern<untyped,
 def VOP3PMods  : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
 def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
 
+def VOP3OpSel  : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
+def VOP3OpSel0 : ComplexPattern<untyped, 3, "SelectVOP3OpSel0">;
+
+def VOP3OpSelMods  : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
+def VOP3OpSelMods0 : ComplexPattern<untyped, 3, "SelectVOP3OpSelMods0">;
 
 //===----------------------------------------------------------------------===//
 // SI assembler operands
@@ -771,6 +785,7 @@ def SRCMODS {
   int NEG_HI = ABS;
   int OP_SEL_0 = 4;
   int OP_SEL_1 = 8;
+  int DST_OP_SEL = 8;
 }
 
 def DSTCLAMP {
@@ -1020,6 +1035,10 @@ class getSrcMod <ValueType VT> {
                      );
 }
 
+class getOpSelMod <ValueType VT> {
+  Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods);
+}
+
 // Return type of input modifiers operand specified input operand for DPP
 class getSrcModExt <ValueType VT> {
     bit isFP = !if(!eq(VT.Value, f16.Value), 1,
@@ -1133,6 +1152,37 @@ class getInsVOP3P <RegisterOperand Src0R
   );
 }
 
+class getInsVOP3OpSel <RegisterOperand Src0RC,
+                       RegisterOperand Src1RC,
+                       RegisterOperand Src2RC,
+                       int NumSrcArgs,
+                       bit HasClamp,
+                       Operand Src0Mod,
+                       Operand Src1Mod,
+                       Operand Src2Mod> {
+  dag ret = !if (!eq(NumSrcArgs, 2),
+    !if (HasClamp,
+      (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+           Src1Mod:$src1_modifiers, Src1RC:$src1,
+           clampmod:$clamp,
+           op_sel:$op_sel),
+      (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+           Src1Mod:$src1_modifiers, Src1RC:$src1,
+           op_sel:$op_sel)),
+    // else NumSrcArgs == 3
+    !if (HasClamp,
+      (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+           Src1Mod:$src1_modifiers, Src1RC:$src1,
+           Src2Mod:$src2_modifiers, Src2RC:$src2,
+           clampmod:$clamp,
+           op_sel:$op_sel),
+      (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+           Src1Mod:$src1_modifiers, Src1RC:$src1,
+           Src2Mod:$src2_modifiers, Src2RC:$src2,
+           op_sel:$op_sel))
+  );
+}
+
 class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
                  bit HasModifiers, Operand Src0Mod, Operand Src1Mod> {
 
@@ -1279,6 +1329,34 @@ class getAsmVOP3P <bit HasDst, int NumSr
   string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
 }
 
+class getAsmVOP3OpSel <int NumSrcArgs,
+                       bit HasClamp,
+                       bit Src0HasMods,
+                       bit Src1HasMods,
+                       bit Src2HasMods> {
+  string dst = " $vdst";
+
+  string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
+  string isrc1 = !if(!eq(NumSrcArgs, 1), "",
+                     !if(!eq(NumSrcArgs, 2), " $src1",
+                                             " $src1,"));
+  string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
+
+  string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
+  string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
+                     !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+                                             " $src1_modifiers,"));
+  string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
+
+  string src0 = !if(Src0HasMods, fsrc0, isrc0);
+  string src1 = !if(Src1HasMods, fsrc1, isrc1);
+  string src2 = !if(Src2HasMods, fsrc2, isrc2);
+
+  string clamp = !if(HasClamp, "$clamp", "");
+
+  string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp;
+}
+
 class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
   string dst = !if(HasDst,
                    !if(!eq(DstVT.Size, 1),
@@ -1462,7 +1540,12 @@ class VOPProfile <list<ValueType> _ArgVT
   field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
                                    NumSrcArgs, HasClamp,
                                    Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
-
+  field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
+                                           NumSrcArgs,
+                                           HasClamp,
+                                           getOpSelMod<Src0VT>.ret,
+                                           getOpSelMod<Src1VT>.ret,
+                                           getOpSelMod<Src2VT>.ret>.ret;
   field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
                                HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
   field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
@@ -1473,6 +1556,11 @@ class VOPProfile <list<ValueType> _ArgVT
   field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
   field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret;
   field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
+  field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
+                                              HasClamp,
+                                              HasSrc0FloatMods,
+                                              HasSrc1FloatMods,
+                                              HasSrc2FloatMods>.ret;
   field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
   field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
   field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
@@ -1495,6 +1583,8 @@ def VOP_I16_I16_I16 : VOPProfile <[i16,
 def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
 def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
 
+def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
+
 def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
 def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
 def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=308740&r1=308739&r2=308740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Fri Jul 21 06:54:11 2017
@@ -1288,12 +1288,32 @@ class FPMed3Pat<ValueType vt,
   (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
 >;
 
+class FP16Med3Pat<ValueType vt,
+                Instruction med3Inst> : Pat<
+  (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+                           (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+           (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+                                           (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+                           (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
+  (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE)
+>;
+
+class Int16Med3Pat<Instruction med3Inst,
+                   SDPatternOperator max,
+                   SDPatternOperator max_oneuse,
+                   SDPatternOperator min_oneuse,
+                   ValueType vt = i32> : Pat<
+  (max (min_oneuse vt:$src0, vt:$src1),
+       (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
+  (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
+>;
+
 def : FPMed3Pat<f32, V_MED3_F32>;
 
 let Predicates = [isGFX9] in {
-def : FPMed3Pat<f16, V_MED3_F16>;
-def : IntMed3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
-def : IntMed3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
+def : FP16Med3Pat<f16, V_MED3_F16>;
+def : Int16Med3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
+def : Int16Med3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
 } // End Predicates = [isGFX9]
 
 //============================================================================//

Modified: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td?rev=308740&r1=308739&r2=308740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td Fri Jul 21 06:54:11 2017
@@ -53,6 +53,46 @@ class getVOP3PModPat<VOPProfile P, SDPat
                   ret1));
 }
 
+class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
+  list<dag> ret3 = [(set P.DstVT:$vdst,
+    (node (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+                                    (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
+          (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)),
+          (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))];
+
+  list<dag> ret2 = [(set P.DstVT:$vdst,
+    (node !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+                          (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
+          (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))];
+
+  list<dag> ret1 = [(set P.DstVT:$vdst,
+    (node (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+
+  list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+                  !if(!eq(P.NumSrcArgs, 2), ret2,
+                  ret1));
+}
+
+class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
+  list<dag> ret3 = [(set P.DstVT:$vdst,
+    (node (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+                                    (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
+          (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)),
+          (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))];
+
+  list<dag> ret2 = [(set P.DstVT:$vdst,
+    (node !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+                          (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
+          (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))];
+
+  list<dag> ret1 = [(set P.DstVT:$vdst,
+    (node (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+
+  list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+                  !if(!eq(P.NumSrcArgs, 2), ret2,
+                  ret1));
+}
+
 class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
   list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
   list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
@@ -67,6 +107,16 @@ class VOP3Inst<string OpName, VOPProfile
     !if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret),
     VOP3Only>;
 
+class VOP3OpSelInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
+  VOP3_Pseudo<OpName, P,
+    !if(isFloatType<P.Src0VT>.ret,
+        getVOP3OpSelModPat<P, node>.ret,
+        getVOP3OpSelPat<P, node>.ret),
+    1, 0, 1> {
+
+  let AsmMatchConverter = "cvtVOP3OpSel";
+}
+
 // Special case for v_div_fmas_{f32|f64}, since it seems to be the
 // only VOP instruction that implicitly reads VCC.
 let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in {
@@ -93,6 +143,11 @@ class VOP3_Profile<VOPProfile P> : VOPPr
   let Asm64 = " " # P.Asm64;
 }
 
+class VOP3OpSel_Profile<VOPProfile P> : VOP3_Profile<P> {
+  let HasClamp = 1;
+  let HasOpSel = 1;
+}
+
 class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
   // v_div_scale_{f32|f64} do not support input modifiers.
   let HasModifiers = 0;
@@ -303,7 +358,7 @@ defm: Ternary_i16_Pats<mul, add, V_MAD_I
 } // End Predicates = [Has16BitInsts]
 
 let SubtargetPredicate = isGFX9 in {
-def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16>>;
+def V_PACK_B32_F16 : VOP3OpSelInst <"v_pack_b32_f16", VOP3OpSel_Profile<VOP_B32_F16_F16>>;
 def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
 def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
 def V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -313,17 +368,26 @@ def V_OR3_B32 : VOP3Inst <"v_or3_b32", V
 
 def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
 
-def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>;
-def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>;
-def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>;
-
-def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmin3>;
-def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmin3>;
-def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumin3>;
-
-def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmax3>;
-def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmax3>;
-def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumax3>;
+def V_MED3_F16 : VOP3OpSelInst <"v_med3_f16", VOP3OpSel_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>;
+def V_MED3_I16 : VOP3OpSelInst <"v_med3_i16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>;
+def V_MED3_U16 : VOP3OpSelInst <"v_med3_u16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>;
+
+def V_MIN3_F16 : VOP3OpSelInst <"v_min3_f16", VOP3OpSel_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmin3>;
+def V_MIN3_I16 : VOP3OpSelInst <"v_min3_i16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmin3>;
+def V_MIN3_U16 : VOP3OpSelInst <"v_min3_u16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUumin3>;
+
+def V_MAX3_F16 : VOP3OpSelInst <"v_max3_f16", VOP3OpSel_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmax3>;
+def V_MAX3_I16 : VOP3OpSelInst <"v_max3_i16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmax3>;
+def V_MAX3_U16 : VOP3OpSelInst <"v_max3_u16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUumax3>;
+
+def V_ADD_I16 : VOP3OpSelInst <"v_add_i16", VOP3OpSel_Profile<VOP_I16_I16_I16>>;
+def V_SUB_I16 : VOP3OpSelInst <"v_sub_i16", VOP3OpSel_Profile<VOP_I16_I16_I16>>;
+
+def V_MAD_U32_U16 : VOP3OpSelInst <"v_mad_u32_u16", VOP3OpSel_Profile<VOP_I32_I16_I16_I32>>;
+def V_MAD_I32_I16 : VOP3OpSelInst <"v_mad_i32_i16", VOP3OpSel_Profile<VOP_I32_I16_I16_I32>>;
+
+def V_CVT_PKNORM_I16_F16 : VOP3OpSelInst <"v_cvt_pknorm_i16_f16", VOP3OpSel_Profile<VOP_B32_F16_F16>>;
+def V_CVT_PKNORM_U16_F16 : VOP3OpSelInst <"v_cvt_pknorm_u16_f16", VOP3OpSel_Profile<VOP_B32_F16_F16>>;
 } // End SubtargetPredicate = isGFX9
 
 
@@ -443,6 +507,11 @@ multiclass VOP3be_Real_vi<bits<10> op> {
             VOP3be_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
 }
 
+multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
+  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
+            VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+}
+
 } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
 
 defm V_MAD_U64_U32      : VOP3be_Real_vi <0x1E8>;
@@ -527,18 +596,27 @@ defm V_ADD3_U32 : VOP3_Real_vi <0x1ff>;
 defm V_LSHL_OR_B32 : VOP3_Real_vi <0x200>;
 defm V_AND_OR_B32 : VOP3_Real_vi <0x201>;
 defm V_OR3_B32 : VOP3_Real_vi <0x202>;
-defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>;
+defm V_PACK_B32_F16 : VOP3OpSel_Real_gfx9 <0x2a0>;
 
 defm V_XAD_U32 : VOP3_Real_vi <0x1f3>;
 
-defm V_MIN3_F16 : VOP3_Real_vi <0x1f4>;
-defm V_MIN3_I16 : VOP3_Real_vi <0x1f5>;
-defm V_MIN3_U16 : VOP3_Real_vi <0x1f6>;
-
-defm V_MAX3_F16 : VOP3_Real_vi <0x1f7>;
-defm V_MAX3_I16 : VOP3_Real_vi <0x1f8>;
-defm V_MAX3_U16 : VOP3_Real_vi <0x1f9>;
-
-defm V_MED3_F16 : VOP3_Real_vi <0x1fa>;
-defm V_MED3_I16 : VOP3_Real_vi <0x1fb>;
-defm V_MED3_U16 : VOP3_Real_vi <0x1fc>;
+defm V_MIN3_F16 : VOP3OpSel_Real_gfx9 <0x1f4>;
+defm V_MIN3_I16 : VOP3OpSel_Real_gfx9 <0x1f5>;
+defm V_MIN3_U16 : VOP3OpSel_Real_gfx9 <0x1f6>;
+
+defm V_MAX3_F16 : VOP3OpSel_Real_gfx9 <0x1f7>;
+defm V_MAX3_I16 : VOP3OpSel_Real_gfx9 <0x1f8>;
+defm V_MAX3_U16 : VOP3OpSel_Real_gfx9 <0x1f9>;
+
+defm V_MED3_F16 : VOP3OpSel_Real_gfx9 <0x1fa>;
+defm V_MED3_I16 : VOP3OpSel_Real_gfx9 <0x1fb>;
+defm V_MED3_U16 : VOP3OpSel_Real_gfx9 <0x1fc>;
+
+defm V_ADD_I16  : VOP3OpSel_Real_gfx9 <0x29e>;
+defm V_SUB_I16  : VOP3OpSel_Real_gfx9 <0x29f>;
+
+defm V_MAD_U32_U16 : VOP3OpSel_Real_gfx9 <0x1f1>;
+defm V_MAD_I32_I16 : VOP3OpSel_Real_gfx9 <0x1f2>;
+
+defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>;
+defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>;

Modified: llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td?rev=308740&r1=308739&r2=308740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td Fri Jul 21 06:54:11 2017
@@ -65,8 +65,13 @@ class VOP3Common <dag outs, dag ins, str
 }
 
 class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
-                   bit VOP3Only = 0, bit isVOP3P = 0> :
-  InstSI <P.Outs64, !if(!and(isVOP3P, P.IsPacked), P.InsVOP3P, P.Ins64), "", pattern>,
+                   bit VOP3Only = 0, bit isVOP3P = 0, bit isVop3OpSel = 0> :
+  InstSI <P.Outs64,
+          !if(isVop3OpSel,
+              P.InsVOP3OpSel,
+              !if(!and(isVOP3P, P.IsPacked), P.InsVOP3P, P.Ins64)),
+          "",
+          pattern>,
   VOP <opName>,
   SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
   MnemonicAlias<opName#"_e64", opName> {
@@ -74,9 +79,12 @@ class VOP3_Pseudo <string opName, VOPPro
   let isPseudo = 1;
   let isCodeGenOnly = 1;
   let UseNamedOperandTable = 1;
+  let VOP3_OPSEL = isVop3OpSel;
 
   string Mnemonic = opName;
-  string AsmOperands = !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64);
+  string AsmOperands = !if(isVop3OpSel,
+                           P.AsmVOP3OpSel,
+                           !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64));
 
   let Size = 8;
   let mayLoad = 0;
@@ -146,11 +154,11 @@ class VOP3P_Real<VOP3P_Pseudo ps, int En
   VOP3_Real<ps, EncodingFamily>;
 
 class VOP3a<VOPProfile P> : Enc64 {
-  bits<2> src0_modifiers;
+  bits<4> src0_modifiers;
   bits<9> src0;
-  bits<2> src1_modifiers;
+  bits<3> src1_modifiers;
   bits<9> src1;
-  bits<2> src2_modifiers;
+  bits<3> src2_modifiers;
   bits<9> src2;
   bits<1> clamp;
   bits<2> omod;
@@ -189,6 +197,13 @@ class VOP3e_vi <bits<10> op, VOPProfile
   let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
 }
 
+class VOP3OpSel_gfx9 <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
+  let Inst{11} = !if(P.HasSrc0, src0_modifiers{2}, 0);
+  let Inst{12} = !if(P.HasSrc1, src1_modifiers{2}, 0);
+  let Inst{13} = !if(P.HasSrc2, src2_modifiers{2}, 0);
+  let Inst{14} = !if(P.HasDst,  src0_modifiers{3}, 0);
+}
+
 class VOP3be <VOPProfile P> : Enc64 {
   bits<8> vdst;
   bits<2> src0_modifiers;




More information about the llvm-commits mailing list