[llvm] ab04556 - [AMDGPU][MC] Correct v_cndmask operand types

Thu Oct 20 05:10:10 PDT 2022

Author: Dmitry Preobrazhensky
Date: 2022-10-20T15:09:35+03:00
New Revision: ab045561eeb713d30c15271d57533bf27a60fcf8

URL: https://github.com/llvm/llvm-project/commit/ab045561eeb713d30c15271d57533bf27a60fcf8
DIFF: https://github.com/llvm/llvm-project/commit/ab045561eeb713d30c15271d57533bf27a60fcf8.diff

LOG: [AMDGPU][MC] Correct v_cndmask operand types

Differential Revision: https://reviews.llvm.org/D136152

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/test/MC/AMDGPU/gfx10_asm_vop2.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 89ce127933f68..c29e92efd596d 100644

--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1778,7 +1778,7 @@ class isModifierType<ValueType SrcVT> {
 }
 
 // Return type of input modifiers operand for specified input operand
-class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
+class getSrcMod <ValueType VT> {
   bit isFP = isFloatType<VT>.ret;
   bit isPacked = isPackedType<VT>.ret;
   Operand ret =  !if(!eq(VT.Size, 64),
@@ -1788,7 +1788,7 @@ class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
                             FP16InputMods,
                             FP32InputMods
                           ),
-                         !if(EnableF32SrcMods, FP32InputMods, Int32InputMods))
+                         Int32InputMods)
                      );
 }
 
@@ -1813,14 +1813,14 @@ class getSrcModDPP_t16 <ValueType VT> {
 }
 
 // Return type of input modifiers operand for specified input operand for DPP
-class getSrcModVOP3DPP <ValueType VT, bit EnableF32SrcMods> {
+class getSrcModVOP3DPP <ValueType VT> {
   bit isFP = isFloatType<VT>.ret;
   bit isPacked = isPackedType<VT>.ret;
   Operand ret =
       !if (isFP,
            !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods,
                 FP32VCSrcInputMods),
-           !if (EnableF32SrcMods, FP32VCSrcInputMods, Int32VCSrcInputMods));
+           Int32VCSrcInputMods);
 }
 
 // Return type of input modifiers operand specified input operand for SDWA
@@ -2429,11 +2429,9 @@ def PatGenMode {
   int Pattern   = 1;
 }
 
-class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
-                  bit _EnableClamp = 0> {
+class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
 
   field list<ValueType> ArgVT = _ArgVT;
-  field bit EnableF32SrcMods = _EnableF32SrcMods;
   field bit EnableClamp = _EnableClamp;
   field bit IsTrue16 = 0;
 
@@ -2459,15 +2457,15 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
   field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
   field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
   field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
-  field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
-  field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret;
-  field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
+  field Operand Src0Mod = getSrcMod<Src0VT>.ret;
+  field Operand Src1Mod = getSrcMod<Src1VT>.ret;
+  field Operand Src2Mod = getSrcMod<Src2VT>.ret;
   field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret;
   field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
   field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
   field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret;
   field Operand Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
-  field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, EnableF32SrcMods>.ret;
+  field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret;
   field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
   field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
 
@@ -2481,12 +2479,10 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
   field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value);
   field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value);
 
-  // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods.
   field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
   field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
   field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
 
-  // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods.
   field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
   field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
   field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
@@ -2507,8 +2503,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
   field bit HasModifiers = !or(isModifierType<Src0VT>.ret,
                                isModifierType<Src1VT>.ret,
                                isModifierType<Src2VT>.ret,
-                               HasOMod,
-                               EnableF32SrcMods);
+                               HasOMod);
 
   field bit HasSrc0Mods = HasModifiers;
   field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
@@ -2652,7 +2647,7 @@ def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
 def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
 def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
 def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
-def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], 0, /*EnableClamp=*/1>;
+def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>;
 
 def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
 def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
@@ -2699,7 +2694,7 @@ def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
 def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
-def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>;
+def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>;
 def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
 def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
 

diff  --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index c3d66f4582635..5f376b11c8da8 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -505,7 +505,7 @@ def VOP_DOT_ACC_I32_I32   : VOP_DOT_ACC<i32, i32> {
 }
 
 // Write out to vcc or arbitrary SGPR.
-def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> {
+def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/1> {
   let Asm32 = "$vdst, vcc, $src0, $src1";
   let Asm64 = "$vdst, $sdst, $src0, $src1$clamp";
   let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
@@ -532,7 +532,7 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp
 
 // Write out to vcc or arbitrary SGPR and read in from vcc or
 // arbitrary SGPR.
-def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> {
+def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1> {
   let HasSrc2Mods = 0;
   let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
   let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp";
@@ -576,7 +576,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*
 }
 
 // Read in from vcc or arbitrary SGPR.
-class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT, /*EnableF32SrcMods=*/1> {
+class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
   let Asm32 = "$vdst, $src0, $src1";
   let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2";
   let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
@@ -591,14 +591,20 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT, /*EnableF32SrcMods=*
 
   // Suppress src2 implied by type since the 32-bit encoding uses an
   // implicit VCC use.
-  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
+  let Ins32 = (ins VSrc_f32:$src0, Src1RC32:$src1);
+
+  let HasModifiers = 1;
+
+  // Select FP modifiers for VOP3
+  let Src0Mod = !if(!eq(Src0VT.Size, 16), FP16InputMods, FP32InputMods);
+  let Src1Mod = Src0Mod;
 
   let HasSrc0IntMods = 0;
   let HasSrc1IntMods = 0;
   let HasSrc0FloatMods = 1;
   let HasSrc1FloatMods = 1;
-  let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0,
-                     FP32SDWAInputMods:$src1_modifiers, Src1SDWA:$src1,
+  let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, SDWASrc_f32:$src0,
+                     FP32SDWAInputMods:$src1_modifiers, SDWASrc_f32:$src1,
                      clampmod:$clamp,
                      dst_sel:$dst_sel, dst_unused:$dst_unused,
                      src0_sel:$src0_sel, src1_sel:$src1_sel);

diff  --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
index 1393c9e787096..3eda9b0939dca 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
@@ -35,6 +35,10 @@ v_cndmask_b32_e32 v5, -4.0, v2, vcc
 // W64: encoding: [0xf7,0x04,0x0a,0x02]
 // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
+v_cndmask_b32_e32 v5, |-4.0|, v2, vcc
+// W64: encoding: [0xf6,0x04,0x0a,0x02]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
 v_cndmask_b32_e32 v5, v1, v255, vcc
 // W64: encoding: [0x01,0xff,0x0b,0x02]
 // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -115,6 +119,10 @@ v_cndmask_b32_sdwa v5, |v1|, -v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_s
 // W64: encoding: [0xf9,0x04,0x0a,0x02,0x01,0x06,0x26,0x16]
 // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
+v_cndmask_b32_sdwa v5, |0.5|, -v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// W64: encoding: [0xf9,0x04,0x0a,0x02,0xf0,0x06,0xa6,0x16]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
 v_cndmask_b32_dpp v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0
 // W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x98,0x00]
 // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -155,6 +163,10 @@ v_cndmask_b32_e32 v5, -4.0, v2, vcc_lo
 // W32: encoding: [0xf7,0x04,0x0a,0x02]
 // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
+v_cndmask_b32_e32 v5, |-4.0|, v2, vcc_lo
+// W32: encoding: [0xf6,0x04,0x0a,0x02]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
 v_cndmask_b32_e32 v5, v1, v255, vcc_lo
 // W32: encoding: [0x01,0xff,0x0b,0x02]
 // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -235,6 +247,10 @@ v_cndmask_b32_sdwa v5, |v1|, -v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src
 // W32: encoding: [0xf9,0x04,0x0a,0x02,0x01,0x06,0x26,0x16]
 // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
+v_cndmask_b32_sdwa v5, |0.5|, -v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// W32: encoding: [0xf9,0x04,0x0a,0x02,0xf0,0x06,0xa6,0x16]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
 v_cndmask_b32_sdwa v5, sext(v1), v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 // W32-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand
 // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand