[llvm] 32ca9bd - [AMDGPU][MC][GFX940] Correct tied operand decoding for smfmac opcodes

Dmitry Preobrazhensky via llvm-commits llvm-commits at lists.llvm.org
Wed May 18 05:39:52 PDT 2022


Author: Dmitry Preobrazhensky
Date: 2022-05-18T15:39:30+03:00
New Revision: 32ca9bd7b5b83a4bc84ed611e3744f20cf62dba6

URL: https://github.com/llvm/llvm-project/commit/32ca9bd7b5b83a4bc84ed611e3744f20cf62dba6
DIFF: https://github.com/llvm/llvm-project/commit/32ca9bd7b5b83a4bc84ed611e3744f20cf62dba6.diff

LOG: [AMDGPU][MC][GFX940] Correct tied operand decoding for smfmac opcodes

Differential Revision: https://reviews.llvm.org/D125790

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
    llvm/lib/Target/AMDGPU/SIDefines.h
    llvm/lib/Target/AMDGPU/SIRegisterInfo.td
    llvm/lib/Target/AMDGPU/VOP3PInstructions.td
    llvm/lib/Target/AMDGPU/VOPInstructions.td
    llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index fd5d83b4ed3c5..a9540b14f1ad9 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -148,7 +148,8 @@ DECODE_OPERAND_REG(AReg_1024)
 DECODE_OPERAND_REG(AV_32)
 DECODE_OPERAND_REG(AV_64)
 DECODE_OPERAND_REG(AV_128)
-DECODE_OPERAND_REG(AV_512)
+DECODE_OPERAND_REG(AVDst_128)
+DECODE_OPERAND_REG(AVDst_512)
 
 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm,
                                          uint64_t Addr,
@@ -972,8 +973,16 @@ MCOperand AMDGPUDisassembler::decodeOperand_AV_128(unsigned Val) const {
   return decodeSrcOp(OPW128, Val);
 }
 
-MCOperand AMDGPUDisassembler::decodeOperand_AV_512(unsigned Val) const {
-  return decodeSrcOp(OPW512, Val);
+MCOperand AMDGPUDisassembler::decodeOperand_AVDst_128(unsigned Val) const {
+  using namespace AMDGPU::EncValues;
+  assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
+  return decodeSrcOp(OPW128, Val | IS_VGPR);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AVDst_512(unsigned Val) const {
+  using namespace AMDGPU::EncValues;
+  assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
+  return decodeSrcOp(OPW512, Val | IS_VGPR);
 }
 
 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {

diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 6e83da88f656e..5e44fecf43c11 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -142,7 +142,8 @@ class AMDGPUDisassembler : public MCDisassembler {
   MCOperand decodeOperand_AV_32(unsigned Val) const;
   MCOperand decodeOperand_AV_64(unsigned Val) const;
   MCOperand decodeOperand_AV_128(unsigned Val) const;
-  MCOperand decodeOperand_AV_512(unsigned Val) const;
+  MCOperand decodeOperand_AVDst_128(unsigned Val) const;
+  MCOperand decodeOperand_AVDst_512(unsigned Val) const;
 
   enum OpWidthTy {
     OPW32,

diff  --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 674fbd20ab16f..3cef4597de4cf 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -280,7 +280,8 @@ enum : unsigned {
   INLINE_FLOATING_C_MAX = 248,
   LITERAL_CONST = 255,
   VGPR_MIN = 256,
-  VGPR_MAX = 511
+  VGPR_MAX = 511,
+  IS_VGPR = 256  // Indicates VGPR or AGPR
 };
 
 } // namespace EncValues

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index e2aa0143328dd..2cb6ae3c884aa 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1097,7 +1097,7 @@ defm VISrc_512  : RegInlineOperandAC<"VReg", "VISrc_512",  "_512">;
 defm VISrc_1024 : RegInlineOperandAC<"VReg", "VISrc_1024", "_1024">;
 
 //===----------------------------------------------------------------------===//
-//  AVSrc_* Operands with an AGPR or VGPR
+//  AVSrc_*, AVDst_*, AVLdSt_* Operands with an AGPR or VGPR
 //===----------------------------------------------------------------------===//
 
 def AVSrc_32 : RegisterOperand<AV_32> {
@@ -1115,8 +1115,13 @@ def AVSrc_128 : RegisterOperand<AV_128> {
   let EncoderMethod = "getAVOperandEncoding";
 }
 
-def AVSrc_512 : RegisterOperand<AV_512> {
-  let DecoderMethod = "DecodeAV_512RegisterClass";
+def AVDst_128 : RegisterOperand<AV_128> {
+  let DecoderMethod = "DecodeAVDst_128RegisterClass";
+  let EncoderMethod = "getAVOperandEncoding";
+}
+
+def AVDst_512 : RegisterOperand<AV_512> {
+  let DecoderMethod = "DecodeAVDst_512RegisterClass";
   let EncoderMethod = "getAVOperandEncoding";
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 5599276df86aa..1c670658f06fa 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -449,12 +449,12 @@ def VOPProfileMAI_I32_I64_X32_VCD    : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32,
 def VOPProfileMAI_F32_V2F32_X16_VCD  : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32,   VISrc_128_b32,  VDst_128,  AVSrc_64>;
 def VOPProfileMAI_F32_V2F32_X32_VCD  : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, VISrc_512_b32,  VDst_512,  AVSrc_64>;
 
-def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32,  AVSrc_128, AVSrc_64, AVSrc_128>;
-def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVSrc_512, AVSrc_64, AVSrc_128>;
-def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I32,  AVSrc_128, AVSrc_64, AVSrc_128>;
-def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVSrc_512, AVSrc_64, AVSrc_128>;
-def VOPProfileSMFMAC_I32_16X16X64_I8  : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32,  AVSrc_128, AVSrc_64, AVSrc_128>;
-def VOPProfileSMFMAC_I32_32X32X32_I8  : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVSrc_512, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32,  AVDst_128, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I32,  AVDst_128, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_I32_16X16X64_I8  : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32,  AVDst_128, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_I32_32X32X32_I8  : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>;
 
 class MFMATable <bit is_mac, string Name> {
   bit IsMac = is_mac;

diff  --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 0f74a4310b38f..bb92333304c23 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -373,7 +373,7 @@ class VOP3Pe_MAI <bits<7> op, VOPProfile P, bit acc_cd = 0> : Enc64 {
 }
 
 class VOP3Pe_SMFMAC <bits<7> op> : Enc64 {
-  bits<10> vdst;
+  bits<10> vdst; // VGPR or AGPR, but not SGPR. vdst{8} is not encoded in the instruction.
   bits<10> src0;
   bits<10> src1;
   bits<9> idx;
@@ -381,7 +381,6 @@ class VOP3Pe_SMFMAC <bits<7> op> : Enc64 {
   bits<3> cbsz;
   bits<4> abid;
 
-  let vdst{8} = 1; // VGPR or AGPR, but not SGPR
   let blgp = 0;
 
   let Inst{7-0} = vdst{7-0};

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt
index 9c339157c12ac..bd3a28042027b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt
@@ -69,18 +69,126 @@
 # GFX940: v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 ; encoding: [0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14]
 0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14
 
+# GFX940: v_smfmac_f32_16x16x32_f16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x00,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x80,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xe2,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
 # GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c]
 0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c
 
 # GFX940: v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 ; encoding: [0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14]
 0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14
 
+# GFX940: v_smfmac_f32_32x32x16_f16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x00,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xe4,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
 # GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c]
 0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c
 
 # GFX940: v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 ; encoding: [0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14]
 0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14
 
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x00,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x80,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xe6,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
 # GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c]
 0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c
 
@@ -93,14 +201,122 @@
 # GFX940: v_smfmac_f32_32x32x16_bf16 a[10:25], v[2:3], a[4:7], v9 ; encoding: [0x0a,0x80,0xe8,0xd3,0x02,0x09,0x26,0x14]
 0x0a,0x80,0xe8,0xd3,0x02,0x09,0x26,0x14
 
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x00,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x80,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xe8,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
 # GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v10 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xea,0xd3,0x02,0x09,0x2a,0x0c]
 0x0a,0x0b,0xea,0xd3,0x02,0x09,0x2a,0x0c
 
 # GFX940: v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v11 ; encoding: [0x0a,0x80,0xea,0xd3,0x02,0x09,0x2e,0x14]
 0x0a,0x80,0xea,0xd3,0x02,0x09,0x2e,0x14
 
+# GFX940: v_smfmac_i32_16x16x64_i8 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x00,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x80,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xea,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xea,0xd3,0x02,0x09,0x0e,0x04
+
 # GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v12 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xec,0xd3,0x02,0x09,0x32,0x0c]
 0x0a,0x0b,0xec,0xd3,0x02,0x09,0x32,0x0c
 
 # GFX940: v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v13 ; encoding: [0x0a,0x80,0xec,0xd3,0x02,0x09,0x36,0x14]
 0x0a,0x80,0xec,0xd3,0x02,0x09,0x36,0x14
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x00,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x80,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xec,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xec,0xd3,0x02,0x09,0x0e,0x04


        


More information about the llvm-commits mailing list