[llvm] r247074 - AMDGPU/SI: Fix input vcc operand for VOP2b instructions

Tue Sep 8 14:15:00 PDT 2015

Author: arsenm
Date: Tue Sep  8 16:15:00 2015
New Revision: 247074

URL: http://llvm.org/viewvc/llvm-project?rev=247074&view=rev
Log:
AMDGPU/SI: Fix input vcc operand for VOP2b instructions

Adds vcc to output string input for e32. Allows option
of using e64 encoding with assembler.

Also fixes these instructions not implicitly reading exec.

Modified:
    llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
    llvm/trunk/test/MC/AMDGPU/vop2-err.s
    llvm/trunk/test/MC/AMDGPU/vop2.s

Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=247074&r1=247073&r2=247074&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Tue Sep  8 16:15:00 2015
@@ -215,6 +215,10 @@ public:
            (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
   }
 
+  bool isSCSrc64() const {
+    return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm();
+  }
+
   bool isVCSrc32() const {
     return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
   }

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=247074&r1=247073&r2=247074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Tue Sep  8 16:15:00 2015
@@ -990,7 +990,14 @@ class getVOPSrc1ForVT<ValueType VT> {
 // Returns the register class to use for sources of VOP3 instructions for the
 // given VT.
 class getVOP3SrcForVT<ValueType VT> {
-  RegisterOperand ret = !if(!eq(VT.Size, 64), VCSrc_64, VCSrc_32);
+  RegisterOperand ret =
+  !if(!eq(VT.Size, 64),
+      VCSrc_64,
+      !if(!eq(VT.Value, i1.Value),
+          SCSrc_64,
+          VCSrc_32
+       )
+    );
 }
 
 // Returns 1 if the source arguments have modifiers, 0 if they do not.
@@ -1070,7 +1077,6 @@ class getAsm64 <int NumSrcArgs, bit HasM
       "$dst, "#src0#src1#src2#"$clamp"#"$omod");
 }
 
-
 class VOPProfile <list<ValueType> _ArgVT> {
 
   field list<ValueType> ArgVT = _ArgVT;
@@ -1132,17 +1138,26 @@ def VOP_I32_F32_F32 : VOPProfile <[i32,
 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
 
-class VOP2b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, untyped]> {
+// Write out to vcc or arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
   let Asm32 = "$dst, vcc, $src0, $src1";
   let Asm64 = "$dst, $sdst, $src0, $src1";
   let Outs32 = (outs DstRC:$dst);
   let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
 }
 
-def VOP2b_I32_I1_I32_I32 : VOP2b_Profile<i32>;
-
-def VOP2b_I32_I1_I32_I32_VCC : VOP2b_Profile<i32> {
+// Write out to vcc or arbitrary SGPR and read in from vcc or
+// arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
   let Src0RC32 = VCSrc_32;
+  let Asm32 = "$dst, vcc, $src0, $src1, vcc";
+  let Asm64 = "$dst, $sdst, $src0, $src1, $src2";
+  let Outs32 = (outs DstRC:$dst);
+  let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
+
+  // Suppress src2 implied by type since the 32-bit encoding uses an
+  // implicit VCC use.
+  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
 }
 
 // VOPC instructions are a special case because for the 32-bit
@@ -1429,32 +1444,19 @@ multiclass VOP3SI_2_m <vop op, dag outs,
   // No VI instruction. This class is for SI only.
 }
 
-// XXX - Is v_div_scale_{f32|f64} only available in vop3b without
-// option of implicit vcc use?
-multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
-                      list<dag> pattern, string opName, string revOp,
-                      bit HasMods = 1, bit UseFullOp = 0> {
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
-           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
-  def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
-            VOP3DisableFields<1, 0, HasMods>;
-
-  def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
-            VOP3DisableFields<1, 0, HasMods>;
-}
-
-multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
-                      list<dag> pattern, string opName, string revOp,
-                      bit HasMods = 1, bit UseFullOp = 0> {
+// Two operand VOP3b instruction that may have a 3rd SGPR bool operand
+// instead of an implicit VCC as in the VOP2b format.
+multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm,
+                        list<dag> pattern, string opName, string revOp,
+                        bit HasMods = 1, bit useSGPRInput = 0,
+                        bit UseFullOp = 0> {
   def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
 
-
   def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
-            VOP3DisableFields<1, 1, HasMods>;
+            VOP3DisableFields<1, useSGPRInput, HasMods>;
 
   def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
-            VOP3DisableFields<1, 1, HasMods>;
+            VOP3DisableFields<1, useSGPRInput, HasMods>;
 }
 
 multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
@@ -1575,12 +1577,14 @@ multiclass VOP2InstSI <vop2 op, string o
 multiclass VOP2b_Helper <vop2 op, string opName, dag outs32, dag outs64,
                          dag ins32, string asm32, list<dag> pat32,
                          dag ins64, string asm64, list<dag> pat64,
-                         string revOp, bit HasMods> {
+                         string revOp, bit HasMods, bit useSGPRInput> {
 
-  defm _e32 : VOP2_m <op, outs32, ins32, asm32, pat32, opName, revOp>;
+  let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
+    defm _e32 : VOP2_m <op, outs32, ins32, asm32, pat32, opName, revOp>;
+  }
 
-  defm _e64 : VOP3b_2_m <op,
-    outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods
+  defm _e64 : VOP3b_2_3_m <op,
+    outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods, useSGPRInput
   >;
 }
 
@@ -1596,7 +1600,7 @@ multiclass VOP2bInst <vop2 op, string op
                                       i1:$clamp, i32:$omod)),
                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
       [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
-  revOp, P.HasModifiers
+  revOp, P.HasModifiers, !eq(P.NumSrcArgs, 3)
 >;
 
 // A VOP2 instruction that is VOP3-only on VI.
@@ -1847,14 +1851,14 @@ multiclass VOP3_VCC_Inst <vop3 op, strin
 
 multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
                     string opName, list<dag> pattern> :
-  VOP3b_3_m <
+  VOP3b_2_3_m <
   op, (outs vrc:$vdst, SReg_64:$sdst),
       (ins InputModsNoDefault:$src0_modifiers, arc:$src0,
            InputModsNoDefault:$src1_modifiers, arc:$src1,
            InputModsNoDefault:$src2_modifiers, arc:$src2,
            ClampMod:$clamp, omod:$omod),
   opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern,
-  opName, opName, 1, 1
+  opName, opName, 1, 0, 1
 >;
 
 multiclass VOP3b_64 <vop3 op, string opName, list<dag> pattern> :

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=247074&r1=247073&r2=247074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Tue Sep  8 16:15:00 2015
@@ -1507,7 +1507,7 @@ let isCommutable = 1 in {
 defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">;
 } // End isCommutable = 1
 
-let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
+let isCommutable = 1 in {
 // No patterns so that the scalar instructions are always selected.
 // The scalar versions will be replaced with vector when needed later.
 
@@ -1522,19 +1522,17 @@ defm V_SUBREV_I32 : VOP2bInst <vop2<0x27
   VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
 >;
 
-let Uses = [VCC] in { // Carry-in comes from VCC
 defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
-  VOP2b_I32_I1_I32_I32_VCC
+  VOP2b_I32_I1_I32_I32_I1
 >;
 defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
-  VOP2b_I32_I1_I32_I32_VCC
+  VOP2b_I32_I1_I32_I32_I1
 >;
 defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
-  VOP2b_I32_I1_I32_I32_VCC, null_frag, "v_subb_u32"
+  VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32"
 >;
 
-} // End Uses = [VCC]
-} // End isCommutable = 1, Defs = [VCC]
+} // End isCommutable = 1
 
 defm V_READLANE_B32 : VOP2SI_3VI_m <
   vop3 <0x001, 0x289>,

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td?rev=247074&r1=247073&r2=247074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td Tue Sep  8 16:15:00 2015
@@ -281,3 +281,13 @@ def VCSrc_64 : RegisterOperand<VS_64> {
   let OperandType = "OPERAND_REG_INLINE_C";
   let ParserMatchClass = RegImmMatcher<"VCSrc64">;
 }
+
+//===----------------------------------------------------------------------===//
+//  SCSrc_* Operands with an SGPR or an inline constant
+//===----------------------------------------------------------------------===//
+
+def SCSrc_64 : RegisterOperand<SReg_64> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"SCSrc64">;
+}

Modified: llvm/trunk/test/MC/AMDGPU/vop2-err.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop2-err.s?rev=247074&r1=247073&r2=247074&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop2-err.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop2-err.s Tue Sep  8 16:15:00 2015
@@ -35,4 +35,28 @@ v_mul_i32_i24_e64 v1, v2, 100
 v_add_i32_e32 v1, s[0:1], v2, v3
 // CHECK: error: invalid operand for instruction
 
+v_addc_u32_e32 v1, vcc, v2, v3, s[2:3]
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, s[0:1], v2, v3, s[2:3]
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, vcc, v2, v3, -1
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, vcc, v2, v3, 123
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, vcc, v2, v3, s0
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, -1, v2, v3, s0
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e64 v1, s[0:1], v2, v3, 123
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32 v1, s[0:1], v2, v3, 123
+// CHECK: error: invalid operand for instruction
+
 // TODO: Constant bus restrictions

Modified: llvm/trunk/test/MC/AMDGPU/vop2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop2.s?rev=247074&r1=247073&r2=247074&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop2.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop2.s Tue Sep  8 16:15:00 2015
@@ -307,29 +307,54 @@ v_subrev_u32 v1, vcc, v2, v3
 // VI:   v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
 v_subrev_u32 v1, s[0:1], v2, v3
 
-// SICI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
-// VI:   v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
-v_addc_u32 v1, vcc, v2, v3
-
-// SICI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00]
-// VI:   v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x02,0x00]
-v_addc_u32 v1, s[0:1], v2, v3
-
-// SICI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
-// VI:   v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
-v_subb_u32 v1, vcc, v2, v3
-
-// SICI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0x02,0x00]
-// VI:   v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0x02,0x00]
-v_subb_u32 v1, s[0:1], v2, v3
-
-// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
-// VI:   v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
-v_subbrev_u32 v1, vcc, v2, v3
-
-// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0x02,0x00]
-// VI:   v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0x02,0x00]
-v_subbrev_u32 v1, s[0:1], v2, v3
+// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
+// VI:   v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
+v_addc_u32 v1, vcc, v2, v3, vcc
+
+// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
+// VI:   v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
+v_addc_u32_e32 v1, vcc, v2, v3, vcc
+
+
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0xaa,0x01]
+v_addc_u32 v1, s[0:1], v2, v3, vcc
+
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00]
+v_addc_u32 v1, s[0:1], v2, v3, s[2:3]
+
+// SI: 	v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00]
+v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3]
+
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x06,0x03]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x06,0x03]
+v_addc_u32_e64 v1, s[0:1], v2, v3, -1
+
+// SI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0x06,0x03]
+// VI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0x06,0x03]
+v_addc_u32_e64 v1, vcc, v2, v3, -1
+
+// SI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0xaa,0x01]
+v_addc_u32_e64 v1, vcc, v2, v3, vcc
+
+// SI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x52]
+// VI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3a]
+v_subb_u32 v1, vcc, v2, v3, vcc
+
+// SI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0xaa,0x01]
+v_subb_u32 v1, s[0:1], v2, v3, vcc
+
+// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x54]
+// VI:   v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3c]
+v_subbrev_u32 v1, vcc, v2, v3, vcc
+
+// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0xaa,0x01]
+v_subbrev_u32 v1, s[0:1], v2, v3, vcc
 
 // SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
 // VI:   v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]