[PATCH] D129084: [AMDGPU] gfx11 Fix disassembler for VOP3 dpp8

Mon Jul 4 07:49:15 PDT 2022

Petar.Avramovic created this revision.
Petar.Avramovic added reviewers: Joe_Nash, rampitec, dp, foad.
Herald added subscribers: kosarev, jsilvanus, kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm.
Herald added a project: All.
Petar.Avramovic requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

Fix disassembler for v_dot2_f16_f16_e64_dpp and v_dot2_bf16_bf16_e64_dpp.
src1_modifiers operand was missing for v_dot2_bf16_bf16 because of type
calculations in td file.
Since op_sel is not last operand for dpp8 versions of the instructions,
we have to add it manually so that remaining operands, dpp8 and fi, are
at correct operand index.
Explanation: op_sel is a dummy operand after last src operand (src2) used
to call helpers for parsing/printing of op_sel[a, b, c]. Helpers use 
src<N>_modifiers for storing actual op_sel values. op_sel is not used in
encoding thus auto-generated disassembler does not know about it. Value
inside op_sel operand is not accessed by parser/printer and when it is the
last operand we can get away with not adding op_sel operand to MCInst.
But when there are more operands after op_sel (in this case dpp8 and fi)
we have to add it to MCInst manually so that later operands are at correct
operand index.


https://reviews.llvm.org/D129084

Files:
  llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
  llvm/lib/Target/AMDGPU/VOP3Instructions.td
  llvm/test/MC/AMDGPU/gfx11_asm_dpp.s
  llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt


Index: llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt
===================================================================

--- llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt
+++ llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt
@@ -47740,6 +47740,12 @@
 # GFX11: v_fma_mixlo_f16 v0, |v1|, -v2, |v3|     ; encoding: [0x00,0x05,0x21,0xcc,0x01,0x05,0x0e,0x44]
 0x00,0x05,0x21,0xcc,0x01,0x05,0x0e,0x44
 
+# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
+
+# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
+
 # GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x00,0x05,0x13,0xcc,0xe9,0x04,0x0e,0xc4,0x01,0x77,0x39,0x05]
 0x00,0x05,0x13,0xcc,0xe9,0x04,0x0e,0xc4,0x01,0x77,0x39,0x05
 
Index: llvm/test/MC/AMDGPU/gfx11_asm_dpp.s
===================================================================
--- llvm/test/MC/AMDGPU/gfx11_asm_dpp.s
+++ llvm/test/MC/AMDGPU/gfx11_asm_dpp.s
@@ -70,6 +70,12 @@
 v_fma_f32_e64_dpp v80, v81, abs(v82), v81 dpp8:[0,1,6,3,4,5,6,7]
 // GFX11: encoding: [0x50,0x02,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa]
 
+v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4]
+// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+
+v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4]
+// GFX11: encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+
 v_max3_f32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
 
Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -699,10 +699,12 @@
 }
 
 class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> {
-  // FIXME VOP3 DPP versions are unsupported
-  let HasExtVOP3DPP = 0;
   let HasClamp = 0;
   let HasOMod = 0;
+  // Type calculations workaround for bf16.
+  let HasSrc0Mods = 1;
+  let HasSrc1Mods = 1;
+  let HasSrc2Mods = 1;
   let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
                                 NumSrcArgs, HasClamp, HasOMod,
                                 !if(isFloatType<Src0VT>.ret, FPVRegInputMods, IntOpSelMods),
@@ -848,9 +850,8 @@
 defm V_MINMAX_U32          : VOP3_Realtriple_gfx11<0x263>;
 defm V_MAXMIN_I32          : VOP3_Realtriple_gfx11<0x264>;
 defm V_MINMAX_I32          : VOP3_Realtriple_gfx11<0x265>;
-// FIXME VOP3 DPP Dot instructions are unsupported
-defm V_DOT2_F16_F16        : VOP3_Real_Base_gfx11<0x266>;
-defm V_DOT2_BF16_BF16      : VOP3_Real_Base_gfx11<0x267>;
+defm V_DOT2_F16_F16        : VOP3_Realtriple_gfx11<0x266>;
+defm V_DOT2_BF16_BF16      : VOP3_Realtriple_gfx11<0x267>;
 defm V_DIV_SCALE_F32       : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
 defm V_DIV_SCALE_F64       : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
 defm V_MAD_U64_U32_gfx11   : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
Index: llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -755,6 +755,12 @@
   } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
              AMDGPU::isVOPC64DPP(Opc)) {
     convertVOPCDPPInst(MI);
+  } else if(MCII->get(Opc).TSFlags & SIInstrFlags::VOP3) {
+    // Since op_sel is not last operand, we have to add it manually so that
+    // remaining operands, dpp8 and fi, are at correct operand index.
+    if (MI.getNumOperands() < DescNumOps &&
+        AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1)
+      insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
   } else {
     // Insert dummy unused src modifiers.
     if (MI.getNumOperands() < DescNumOps &&


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D129084.442099.patch
Type: text/x-patch
Size: 4364 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220704/633ff00b/attachment.bin>