[llvm] bb901dc - [AMDGPU][MC][GFX940] Correct disassembly of MFMA opcodes

Dmitry Preobrazhensky via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 1 06:04:06 PDT 2022


Author: Dmitry Preobrazhensky
Date: 2022-08-01T16:00:47+03:00
New Revision: bb901dcc5a5998491df6dbc9a9962812e2c43436

URL: https://github.com/llvm/llvm-project/commit/bb901dcc5a5998491df6dbc9a9962812e2c43436
DIFF: https://github.com/llvm/llvm-project/commit/bb901dcc5a5998491df6dbc9a9962812e2c43436.diff

LOG: [AMDGPU][MC][GFX940] Correct disassembly of MFMA opcodes

Add a decoder table for GFX940 MFMA opcodes.

Differential Revision: https://reviews.llvm.org/D130759

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
    llvm/lib/Target/AMDGPU/VOP3PInstructions.td
    llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 98ee720200b43..3af5d92bb5469 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -560,6 +560,12 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     if (Bytes.size() < 4) break;
     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
 
+    if (STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]) {
+      Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address);
+      if (Res)
+        break;
+    }
+
     if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
       Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address);
       if (Res)

diff  --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index b300e1acf81eb..679f6db7453fd 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1004,14 +1004,14 @@ multiclass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(N
                                   VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"),
                                   VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
   let SubtargetPredicate = isGFX940Plus,
-      AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9",
+      AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940",
       AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
   def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
                     VOP3Pe_MAI <op, PS_ACD.Pfl, 1>;
 
   def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>,
                     VOP3Pe_MAI <op, PS_VCD.Pfl, 0>;
-  } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9"
+  } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940"
 
   defm : VOP3P_Real_MFMA_gfx940_aliases<Name, PS_ACD.Mnemonic, NAME>;
 

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt
index 048e12a04be16..0742cf1b5c720 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt
@@ -440,3 +440,159 @@
 
 # GFX940: v_smfmac_f32_32x32x32_fp8_fp8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xff,0xd3,0x02,0x09,0x06,0x14]
 0x00,0x80,0xff,0xd3,0x02,0x09,0x06,0x14
+
+# GFX940: v_mfma_f32_16x16x16_f16 v[10:13], v[2:3], v[4:5], v[6:9] ; encoding: [0x0a,0x00,0xcd,0xd3,0x02,0x09,0x1a,0x04]
+0x0a,0x00,0xcd,0xd3,0x02,0x09,0x1a,0x04
+
+# GFX940: v_mfma_f32_16x16x16_f16 v[252:255], a[254:255], v[254:255], v[252:255] ; encoding: [0xfc,0x00,0xcd,0xd3,0xfe,0xfd,0xf3,0x0f]
+0xfc,0x00,0xcd,0xd3,0xfe,0xfd,0xf3,0x0f
+
+# GFX940: v_mfma_f32_16x16x16_f16 v[252:255], v[254:255], a[254:255], v[252:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xfc,0x3a,0xcd,0xd3,0xfe,0xfd,0xf3,0x77]
+0xfc,0x3a,0xcd,0xd3,0xfe,0xfd,0xf3,0x77
+
+# GFX940: v_mfma_f32_16x16x16_f16 a[252:255], a[254:255], a[254:255], a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xcd,0xd3,0xfe,0xfd,0xf3,0xff]
+0xfc,0xff,0xcd,0xd3,0xfe,0xfd,0xf3,0xff
+
+# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], v1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x07]
+0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x07
+
+# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], a1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x0f]
+0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x0f
+
+# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], v1, a2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc1,0xd3,0x01,0x05,0xc2,0x77]
+0xf0,0x3a,0xc1,0xd3,0x01,0x05,0xc2,0x77
+
+# GFX940: v_mfma_f32_16x16x1_4b_f32 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc1,0xd3,0xff,0xff,0xc3,0xff]
+0xf0,0xff,0xc1,0xd3,0xff,0xff,0xc3,0xff
+
+# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], v[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x07]
+0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x07
+
+# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], a[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x0f]
+0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x0f
+
+# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], v[2:3], a[4:5], v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc9,0xd3,0x02,0x09,0xc2,0x77]
+0xf0,0x3a,0xc9,0xd3,0x02,0x09,0xc2,0x77
+
+# GFX940: v_mfma_f32_16x16x4_4b_f16 a[240:255], a[254:255], a[254:255], a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc9,0xd3,0xfe,0xfd,0xc3,0xff]
+0xf0,0xff,0xc9,0xd3,0xfe,0xfd,0xc3,0xff
+
+# GFX940: v_mfma_f32_16x16x4_f32 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xc5,0xd3,0x01,0x05,0x1a,0x04]
+0x0a,0x00,0xc5,0xd3,0x01,0x05,0x1a,0x04
+
+# GFX940: v_mfma_f32_16x16x4_f32 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xc5,0xd3,0xff,0xff,0xf3,0x0f]
+0xfc,0x00,0xc5,0xd3,0xff,0xff,0xf3,0x0f
+
+# GFX940: v_mfma_f32_16x16x4_f32 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xc5,0xd3,0x01,0x05,0x1a,0x74]
+0x0a,0xba,0xc5,0xd3,0x01,0x05,0x1a,0x74
+
+# GFX940: v_mfma_f32_16x16x4_f32 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xc5,0xd3,0xff,0xff,0xf3,0xff]
+0xfc,0xff,0xc5,0xd3,0xff,0xff,0xf3,0xff
+
+# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], v1, v2, v[224:255] ; encoding: [0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x07]
+0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x07
+
+# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], a1, v2, v[224:255] ; encoding: [0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x0f]
+0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x0f
+
+# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], v1, a2, v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xc0,0xd3,0x01,0x05,0x82,0x77]
+0xe0,0x3a,0xc0,0xd3,0x01,0x05,0x82,0x77
+
+# GFX940: v_mfma_f32_32x32x1_2b_f32 a[224:255], a255, a255, a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xc0,0xd3,0xff,0xff,0x83,0xff]
+0xe0,0xff,0xc0,0xd3,0xff,0xff,0x83,0xff
+
+# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], v1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x07]
+0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x07
+
+# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], a1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x0f]
+0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x0f
+
+# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], v1, a2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc4,0xd3,0x01,0x05,0xc2,0x77]
+0xf0,0x3a,0xc4,0xd3,0x01,0x05,0xc2,0x77
+
+# GFX940: v_mfma_f32_32x32x2_f32 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc4,0xd3,0xff,0xff,0xc3,0xff]
+0xf0,0xff,0xc4,0xd3,0xff,0xff,0xc3,0xff
+
+# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], v[2:3], v[4:5], v[224:255] ; encoding: [0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x07]
+0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x07
+
+# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], a[2:3], v[4:5], v[224:255] ; encoding: [0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x0f]
+0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x0f
+
+# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], v[2:3], a[4:5], v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xc8,0xd3,0x02,0x09,0x82,0x77]
+0xe0,0x3a,0xc8,0xd3,0x02,0x09,0x82,0x77
+
+# GFX940: v_mfma_f32_32x32x4_2b_f16 a[224:255], a[254:255], a[254:255], a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xc8,0xd3,0xfe,0xfd,0x83,0xff]
+0xe0,0xff,0xc8,0xd3,0xfe,0xfd,0x83,0xff
+
+# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], v[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x07]
+0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x07
+
+# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], a[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x0f]
+0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x0f
+
+# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], v[2:3], a[4:5], v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xcc,0xd3,0x02,0x09,0xc2,0x77]
+0xf0,0x3a,0xcc,0xd3,0x02,0x09,0xc2,0x77
+
+# GFX940: v_mfma_f32_32x32x8_f16 a[240:255], a[254:255], a[254:255], a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xcc,0xd3,0xfe,0xfd,0xc3,0xff]
+0xf0,0xff,0xcc,0xd3,0xfe,0xfd,0xc3,0xff
+
+# GFX940: v_mfma_f32_4x4x1_16b_f32 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xc2,0xd3,0x01,0x05,0x1a,0x04]
+0x0a,0x00,0xc2,0xd3,0x01,0x05,0x1a,0x04
+
+# GFX940: v_mfma_f32_4x4x1_16b_f32 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xc2,0xd3,0xff,0xff,0xf3,0x0f]
+0xfc,0x00,0xc2,0xd3,0xff,0xff,0xf3,0x0f
+
+# GFX940: v_mfma_f32_4x4x1_16b_f32 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xc2,0xd3,0x01,0x05,0x1a,0x74]
+0x0a,0xba,0xc2,0xd3,0x01,0x05,0x1a,0x74
+
+# GFX940: v_mfma_f32_4x4x1_16b_f32 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xc2,0xd3,0xff,0xff,0xf3,0xff]
+0xfc,0xff,0xc2,0xd3,0xff,0xff,0xf3,0xff
+
+# GFX940: v_mfma_f32_4x4x4_16b_f16 v[10:13], v[2:3], v[4:5], v[6:9] ; encoding: [0x0a,0x00,0xca,0xd3,0x02,0x09,0x1a,0x04]
+0x0a,0x00,0xca,0xd3,0x02,0x09,0x1a,0x04
+
+# GFX940: v_mfma_f32_4x4x4_16b_f16 v[252:255], a[254:255], v[254:255], v[252:255] ; encoding: [0xfc,0x00,0xca,0xd3,0xfe,0xfd,0xf3,0x0f]
+0xfc,0x00,0xca,0xd3,0xfe,0xfd,0xf3,0x0f
+
+# GFX940: v_mfma_f32_4x4x4_16b_f16 a[10:13], v[2:3], a[4:5], a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xca,0xd3,0x02,0x09,0x1a,0x74]
+0x0a,0xba,0xca,0xd3,0x02,0x09,0x1a,0x74
+
+# GFX940: v_mfma_f32_4x4x4_16b_f16 a[252:255], a[254:255], a[254:255], a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xca,0xd3,0xfe,0xfd,0xf3,0xff]
+0xfc,0xff,0xca,0xd3,0xfe,0xfd,0xf3,0xff
+
+# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], a1, a2, v[240:255] ; encoding: [0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x1f]
+0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x1f
+
+# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], v1, a2, v[240:255] ; encoding: [0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x17]
+0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x17
+
+# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], a1, v2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xd1,0xd3,0x01,0x05,0xc2,0x6f]
+0xf0,0x3a,0xd1,0xd3,0x01,0x05,0xc2,0x6f
+
+# GFX940: v_mfma_i32_16x16x4_4b_i8 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xd1,0xd3,0xff,0xff,0xc3,0xff]
+0xf0,0xff,0xd1,0xd3,0xff,0xff,0xc3,0xff
+
+# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], v1, v2, v[224:255] ; encoding: [0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x07]
+0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x07
+
+# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], v1, a2, v[224:255] ; encoding: [0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x17]
+0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x17
+
+# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], a1, v2, v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xd0,0xd3,0x01,0x05,0x82,0x6f]
+0xe0,0x3a,0xd0,0xd3,0x01,0x05,0x82,0x6f
+
+# GFX940: v_mfma_i32_32x32x4_2b_i8 a[224:255], a255, a255, a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xd0,0xd3,0xff,0xff,0x83,0xff]
+0xe0,0xff,0xd0,0xd3,0xff,0xff,0x83,0xff
+
+# GFX940: v_mfma_i32_4x4x4_16b_i8 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xd2,0xd3,0x01,0x05,0x1a,0x04]
+0x0a,0x00,0xd2,0xd3,0x01,0x05,0x1a,0x04
+
+# GFX940: v_mfma_i32_4x4x4_16b_i8 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xd2,0xd3,0xff,0xff,0xf3,0x0f]
+0xfc,0x00,0xd2,0xd3,0xff,0xff,0xf3,0x0f
+
+# GFX940: v_mfma_i32_4x4x4_16b_i8 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xd2,0xd3,0x01,0x05,0x1a,0x74]
+0x0a,0xba,0xd2,0xd3,0x01,0x05,0x1a,0x74
+
+# GFX940: v_mfma_i32_4x4x4_16b_i8 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xd2,0xd3,0xff,0xff,0xf3,0xff]
+0xfc,0xff,0xd2,0xd3,0xff,0xff,0xf3,0xff


        


More information about the llvm-commits mailing list