[llvm] r330123 - [AMDGPU][MC][VI][GFX9] Added support of SDWA/DPP for v_cndmask_b32
Dmitry Preobrazhensky via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 16 05:41:39 PDT 2018
Author: dpreobra
Date: Mon Apr 16 05:41:38 2018
New Revision: 330123
URL: http://llvm.org/viewvc/llvm-project?rev=330123&view=rev
Log:
[AMDGPU][MC][VI][GFX9] Added support of SDWA/DPP for v_cndmask_b32
See bug 36356: https://bugs.llvm.org/show_bug.cgi?id=36356
Differential Revision: https://reviews.llvm.org/D45446
Reviewers: artem.tamazov, arsenm, timcorringham
Modified:
llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
llvm/trunk/test/MC/AMDGPU/vop_dpp.s
llvm/trunk/test/MC/AMDGPU/vop_sdwa.s
llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt
llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt
llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt
Modified: llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp Mon Apr 16 05:41:38 2018
@@ -890,6 +890,10 @@ bool SIPeepholeSDWA::isConvertibleToSDWA
Opc == AMDGPU::V_MAC_F32_e32))
return false;
+ // FIXME: has SDWA but require handling of implicit VCC use
+ if (Opc == AMDGPU::V_CNDMASK_B32_e32)
+ return false;
+
return true;
}
Modified: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td Mon Apr 16 05:41:38 2018
@@ -168,6 +168,10 @@ multiclass VOP2eInst <string opName,
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
def _e32 : VOP2_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
+ let AsmMatchConverter = "cvtSdwaVOP2b";
+ }
}
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -294,12 +298,30 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i
let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
let Asm32 = "$vdst, $src0, $src1, vcc";
let Asm64 = "$vdst, $src0, $src1, $src2";
+ let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+ let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+ let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst);
// Suppress src2 implied by type since the 32-bit encoding uses an
// implicit VCC use.
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
+
+ let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+ Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
+ clampmod:$clamp,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel, src1_sel:$src1_sel);
+
+ let InsDPP = (ins DstRCDPP:$old,
+ Src0DPP:$src0,
+ Src1DPP:$src1,
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let HasExt = 1;
+ let HasSDWA9 = 1;
}
def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
@@ -820,7 +842,7 @@ multiclass VOP2_Real_e32e64_vi <bits<6>
def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
}
-defm V_CNDMASK_B32 : Base_VOP2_Real_e32e64_vi <0x0>;
+defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>;
defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>;
defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>;
defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>;
Modified: llvm/trunk/test/MC/AMDGPU/vop_dpp.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop_dpp.s?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop_dpp.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop_dpp.s Mon Apr 16 05:41:38 2018
@@ -588,6 +588,14 @@ v_subb_co_u32 v1, vcc, v2, v3, vcc row_s
// GFX9: v_subbrev_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1]
v_subbrev_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
+// NOSICI: error
+// VI9: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0xe4,0x00,0x00]
+v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+
+// NOSICI: error
+// VI9: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0x0f,0x01,0x00]
+v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0
+
//===----------------------------------------------------------------------===//
// Check that immideates and scalar regs are not supported
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/MC/AMDGPU/vop_sdwa.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop_sdwa.s?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop_sdwa.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop_sdwa.s Mon Apr 16 05:41:38 2018
@@ -581,6 +581,20 @@ v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc
// GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
+// NOSICI: error
+// GFX89: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06]
+v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+
+// NOSICI: error
+// NOVI: error
+// GFX9: v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0xc1,0x06,0x86,0x06]
+v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+
+// NOSICI: error
+// NOVI: error
+// GFX9: v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x06,0x06,0x0e]
+v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+
//===----------------------------------------------------------------------===//
// Check VOPC opcodes
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt (original)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt Mon Apr 16 05:41:38 2018
@@ -91,4 +91,10 @@
0xfa 0xe4 0x98 0x2c 0x4c 0x4e 0x00 0xff
# VI: v_mac_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x46,0x02,0x01,0x09,0xa1]
-0xfa 0x06 0x02 0x46 0x02 0x01 0x09 0xa1
\ No newline at end of file
+0xfa 0x06 0x02 0x46 0x02 0x01 0x09 0xa1
+
+# VI: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0xe4,0x00,0x00]
+0xfa,0x04,0x0a,0x00,0x01,0xe4,0x00,0x00
+
+# VI: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0x0f,0x01,0x00]
+0xfa,0x04,0x0a,0x00,0x01,0x0f,0x01,0x00
Modified: llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt (original)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt Mon Apr 16 05:41:38 2018
@@ -393,6 +393,15 @@
# GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
0xf9 0x06 0x02 0x3c 0x02 0x06 0x05 0x02
+# GFX9: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06]
+0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06
+
+# GFX9: v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0xc1,0x06,0x86,0x06]
+0xf9,0x04,0x0a,0x00,0xc1,0x06,0x86,0x06
+
+# GFX9: v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x06,0x06,0x0e]
+0xf9,0x04,0x0a,0x00,0x01,0x06,0x06,0x0e
+
#-----------------------------------------------------------------------------#
# VOPC
#-----------------------------------------------------------------------------#
Modified: llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt?rev=330123&r1=330122&r2=330123&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt (original)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt Mon Apr 16 05:41:38 2018
@@ -365,3 +365,6 @@
# VI: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
0xf9 0x06 0x02 0x3c 0x02 0x06 0x05 0x02
+
+# VI: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06]
+0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06
More information about the llvm-commits
mailing list