[llvm] r330123 - [AMDGPU][MC][VI][GFX9] Added support of SDWA/DPP for v_cndmask_b32

Dmitry Preobrazhensky via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 16 05:41:39 PDT 2018


Author: dpreobra
Date: Mon Apr 16 05:41:38 2018
New Revision: 330123

URL: http://llvm.org/viewvc/llvm-project?rev=330123&view=rev
Log:
[AMDGPU][MC][VI][GFX9] Added support of SDWA/DPP for v_cndmask_b32

See bug 36356: https://bugs.llvm.org/show_bug.cgi?id=36356

Differential Revision: https://reviews.llvm.org/D45446

Reviewers: artem.tamazov, arsenm, timcorringham

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
    llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/trunk/test/MC/AMDGPU/vop_dpp.s
    llvm/trunk/test/MC/AMDGPU/vop_sdwa.s
    llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt
    llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt
    llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt

Modified: llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp Mon Apr 16 05:41:38 2018
@@ -890,6 +890,10 @@ bool SIPeepholeSDWA::isConvertibleToSDWA
                            Opc == AMDGPU::V_MAC_F32_e32))
     return false;
 
+  // FIXME: has SDWA but require handling of implicit VCC use
+  if (Opc == AMDGPU::V_CNDMASK_B32_e32)
+    return false;
+
   return true;
 }
 

Modified: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td Mon Apr 16 05:41:38 2018
@@ -168,6 +168,10 @@ multiclass VOP2eInst <string opName,
     let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
       def _e32 : VOP2_Pseudo <opName, P>,
                  Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+      def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
+        let AsmMatchConverter = "cvtSdwaVOP2b";
+      }
     }
 
     def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -294,12 +298,30 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i
   let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
   let Asm32 = "$vdst, $src0, $src1, vcc";
   let Asm64 = "$vdst, $src0, $src1, $src2";
+  let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+  let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+  let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+
   let Outs32 = (outs DstRC:$vdst);
   let Outs64 = (outs DstRC:$vdst);
 
   // Suppress src2 implied by type since the 32-bit encoding uses an
   // implicit VCC use.
   let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
+
+  let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+                     Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
+                     clampmod:$clamp,
+                     dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     src0_sel:$src0_sel, src1_sel:$src1_sel);
+
+  let InsDPP = (ins DstRCDPP:$old,
+                    Src0DPP:$src0,
+                    Src1DPP:$src1,
+                    dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+                    bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+  let HasExt = 1;
+  let HasSDWA9 = 1;
 }
 
 def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
@@ -820,7 +842,7 @@ multiclass VOP2_Real_e32e64_vi <bits<6>
   def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
 }
 
-defm V_CNDMASK_B32        : Base_VOP2_Real_e32e64_vi <0x0>;
+defm V_CNDMASK_B32        : VOP2_Real_e32e64_vi <0x0>;
 defm V_ADD_F32            : VOP2_Real_e32e64_vi <0x1>;
 defm V_SUB_F32            : VOP2_Real_e32e64_vi <0x2>;
 defm V_SUBREV_F32         : VOP2_Real_e32e64_vi <0x3>;

Modified: llvm/trunk/test/MC/AMDGPU/vop_dpp.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop_dpp.s?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop_dpp.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop_dpp.s Mon Apr 16 05:41:38 2018
@@ -588,6 +588,14 @@ v_subb_co_u32 v1, vcc, v2, v3, vcc row_s
 // GFX9: v_subbrev_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1]
 v_subbrev_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
+// NOSICI: error
+// VI9:    v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0xe4,0x00,0x00]
+v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+
+// NOSICI: error
+// VI9:    v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0x0f,0x01,0x00]
+v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0
+
 //===----------------------------------------------------------------------===//
 // Check that immideates and scalar regs are not supported
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/test/MC/AMDGPU/vop_sdwa.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop_sdwa.s?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop_sdwa.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop_sdwa.s Mon Apr 16 05:41:38 2018
@@ -581,6 +581,20 @@ v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc
 // GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
 v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
 
+// NOSICI: error
+// GFX89:  v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06]
+v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+
+// NOSICI: error
+// NOVI:   error
+// GFX9:   v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0xc1,0x06,0x86,0x06]
+v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+
+// NOSICI: error
+// NOVI:   error
+// GFX9:   v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x06,0x06,0x0e]
+v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+
 //===----------------------------------------------------------------------===//
 // Check VOPC opcodes
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt (original)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt Mon Apr 16 05:41:38 2018
@@ -91,4 +91,10 @@
 0xfa 0xe4 0x98 0x2c 0x4c 0x4e 0x00 0xff
 
 # VI: v_mac_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x46,0x02,0x01,0x09,0xa1]
-0xfa 0x06 0x02 0x46 0x02 0x01 0x09 0xa1
\ No newline at end of file
+0xfa 0x06 0x02 0x46 0x02 0x01 0x09 0xa1
+
+# VI: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0xe4,0x00,0x00]
+0xfa,0x04,0x0a,0x00,0x01,0xe4,0x00,0x00
+
+# VI: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0x0f,0x01,0x00]
+0xfa,0x04,0x0a,0x00,0x01,0x0f,0x01,0x00

Modified: llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt (original)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt Mon Apr 16 05:41:38 2018
@@ -393,6 +393,15 @@
 # GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc  dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
 0xf9 0x06 0x02 0x3c 0x02 0x06 0x05 0x02
 
+# GFX9: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06]
+0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06
+
+# GFX9: v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0xc1,0x06,0x86,0x06]
+0xf9,0x04,0x0a,0x00,0xc1,0x06,0x86,0x06
+
+# GFX9: v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x06,0x06,0x0e]
+0xf9,0x04,0x0a,0x00,0x01,0x06,0x06,0x0e
+
 #-----------------------------------------------------------------------------#
 # VOPC
 #-----------------------------------------------------------------------------#

Modified: llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt (original)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt Mon Apr 16 05:41:38 2018
@@ -365,3 +365,6 @@
 
 # VI: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc  dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
 0xf9 0x06 0x02 0x3c 0x02 0x06 0x05 0x02
+
+# VI: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06]
+0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06




More information about the llvm-commits mailing list