[llvm] 32ca9bd - [AMDGPU][MC][GFX940] Correct tied operand decoding for smfmac opcodes
Dmitry Preobrazhensky via llvm-commits
llvm-commits at lists.llvm.org
Wed May 18 05:39:52 PDT 2022
Author: Dmitry Preobrazhensky
Date: 2022-05-18T15:39:30+03:00
New Revision: 32ca9bd7b5b83a4bc84ed611e3744f20cf62dba6
URL: https://github.com/llvm/llvm-project/commit/32ca9bd7b5b83a4bc84ed611e3744f20cf62dba6
DIFF: https://github.com/llvm/llvm-project/commit/32ca9bd7b5b83a4bc84ed611e3744f20cf62dba6.diff
LOG: [AMDGPU][MC][GFX940] Correct tied operand decoding for smfmac opcodes
Differential Revision: https://reviews.llvm.org/D125790
Added:
Modified:
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
llvm/lib/Target/AMDGPU/SIDefines.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index fd5d83b4ed3c5..a9540b14f1ad9 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -148,7 +148,8 @@ DECODE_OPERAND_REG(AReg_1024)
DECODE_OPERAND_REG(AV_32)
DECODE_OPERAND_REG(AV_64)
DECODE_OPERAND_REG(AV_128)
-DECODE_OPERAND_REG(AV_512)
+DECODE_OPERAND_REG(AVDst_128)
+DECODE_OPERAND_REG(AVDst_512)
static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm,
uint64_t Addr,
@@ -972,8 +973,16 @@ MCOperand AMDGPUDisassembler::decodeOperand_AV_128(unsigned Val) const {
return decodeSrcOp(OPW128, Val);
}
-MCOperand AMDGPUDisassembler::decodeOperand_AV_512(unsigned Val) const {
- return decodeSrcOp(OPW512, Val);
+MCOperand AMDGPUDisassembler::decodeOperand_AVDst_128(unsigned Val) const {
+ using namespace AMDGPU::EncValues;
+ assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
+ return decodeSrcOp(OPW128, Val | IS_VGPR);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AVDst_512(unsigned Val) const {
+ using namespace AMDGPU::EncValues;
+ assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
+ return decodeSrcOp(OPW512, Val | IS_VGPR);
}
MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 6e83da88f656e..5e44fecf43c11 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -142,7 +142,8 @@ class AMDGPUDisassembler : public MCDisassembler {
MCOperand decodeOperand_AV_32(unsigned Val) const;
MCOperand decodeOperand_AV_64(unsigned Val) const;
MCOperand decodeOperand_AV_128(unsigned Val) const;
- MCOperand decodeOperand_AV_512(unsigned Val) const;
+ MCOperand decodeOperand_AVDst_128(unsigned Val) const;
+ MCOperand decodeOperand_AVDst_512(unsigned Val) const;
enum OpWidthTy {
OPW32,
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 674fbd20ab16f..3cef4597de4cf 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -280,7 +280,8 @@ enum : unsigned {
INLINE_FLOATING_C_MAX = 248,
LITERAL_CONST = 255,
VGPR_MIN = 256,
- VGPR_MAX = 511
+ VGPR_MAX = 511,
+ IS_VGPR = 256 // Indicates VGPR or AGPR
};
} // namespace EncValues
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index e2aa0143328dd..2cb6ae3c884aa 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1097,7 +1097,7 @@ defm VISrc_512 : RegInlineOperandAC<"VReg", "VISrc_512", "_512">;
defm VISrc_1024 : RegInlineOperandAC<"VReg", "VISrc_1024", "_1024">;
//===----------------------------------------------------------------------===//
-// AVSrc_* Operands with an AGPR or VGPR
+// AVSrc_*, AVDst_*, AVLdSt_* Operands with an AGPR or VGPR
//===----------------------------------------------------------------------===//
def AVSrc_32 : RegisterOperand<AV_32> {
@@ -1115,8 +1115,13 @@ def AVSrc_128 : RegisterOperand<AV_128> {
let EncoderMethod = "getAVOperandEncoding";
}
-def AVSrc_512 : RegisterOperand<AV_512> {
- let DecoderMethod = "DecodeAV_512RegisterClass";
+def AVDst_128 : RegisterOperand<AV_128> {
+ let DecoderMethod = "DecodeAVDst_128RegisterClass";
+ let EncoderMethod = "getAVOperandEncoding";
+}
+
+def AVDst_512 : RegisterOperand<AV_512> {
+ let DecoderMethod = "DecodeAVDst_512RegisterClass";
let EncoderMethod = "getAVOperandEncoding";
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 5599276df86aa..1c670658f06fa 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -449,12 +449,12 @@ def VOPProfileMAI_I32_I64_X32_VCD : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32,
def VOPProfileMAI_F32_V2F32_X16_VCD : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>;
def VOPProfileMAI_F32_V2F32_X32_VCD : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>;
-def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32, AVSrc_128, AVSrc_64, AVSrc_128>;
-def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVSrc_512, AVSrc_64, AVSrc_128>;
-def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I32, AVSrc_128, AVSrc_64, AVSrc_128>;
-def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVSrc_512, AVSrc_64, AVSrc_128>;
-def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32, AVSrc_128, AVSrc_64, AVSrc_128>;
-def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVSrc_512, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32, AVDst_128, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I32, AVDst_128, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>;
class MFMATable <bit is_mac, string Name> {
bit IsMac = is_mac;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 0f74a4310b38f..bb92333304c23 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -373,7 +373,7 @@ class VOP3Pe_MAI <bits<7> op, VOPProfile P, bit acc_cd = 0> : Enc64 {
}
class VOP3Pe_SMFMAC <bits<7> op> : Enc64 {
- bits<10> vdst;
+ bits<10> vdst; // VGPR or AGPR, but not SGPR. vdst{8} is not encoded in the instruction.
bits<10> src0;
bits<10> src1;
bits<9> idx;
@@ -381,7 +381,6 @@ class VOP3Pe_SMFMAC <bits<7> op> : Enc64 {
bits<3> cbsz;
bits<4> abid;
- let vdst{8} = 1; // VGPR or AGPR, but not SGPR
let blgp = 0;
let Inst{7-0} = vdst{7-0};
diff --git a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt
index 9c339157c12ac..bd3a28042027b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt
@@ -69,18 +69,126 @@
# GFX940: v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 ; encoding: [0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14]
0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14
+# GFX940: v_smfmac_f32_16x16x32_f16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x00,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x80,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xe2,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe2,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xe2,0xd3,0x02,0x09,0x0e,0x04
+
# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c]
0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c
# GFX940: v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 ; encoding: [0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14]
0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14
+# GFX940: v_smfmac_f32_32x32x16_f16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x00,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xe4,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe4,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xe4,0xd3,0x02,0x09,0x0e,0x04
+
# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c]
0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c
# GFX940: v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 ; encoding: [0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14]
0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x00,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x80,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xe6,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe6,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xe6,0xd3,0x02,0x09,0x0e,0x04
+
# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c]
0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c
@@ -93,14 +201,122 @@
# GFX940: v_smfmac_f32_32x32x16_bf16 a[10:25], v[2:3], a[4:7], v9 ; encoding: [0x0a,0x80,0xe8,0xd3,0x02,0x09,0x26,0x14]
0x0a,0x80,0xe8,0xd3,0x02,0x09,0x26,0x14
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x00,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x80,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xe8,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe8,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xe8,0xd3,0x02,0x09,0x0e,0x04
+
# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v10 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xea,0xd3,0x02,0x09,0x2a,0x0c]
0x0a,0x0b,0xea,0xd3,0x02,0x09,0x2a,0x0c
# GFX940: v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v11 ; encoding: [0x0a,0x80,0xea,0xd3,0x02,0x09,0x2e,0x14]
0x0a,0x80,0xea,0xd3,0x02,0x09,0x2e,0x14
+# GFX940: v_smfmac_i32_16x16x64_i8 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x00,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0xfc,0x80,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xea,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xea,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xea,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xea,0xd3,0x02,0x09,0x0e,0x04
+
# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v12 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xec,0xd3,0x02,0x09,0x32,0x0c]
0x0a,0x0b,0xec,0xd3,0x02,0x09,0x32,0x0c
# GFX940: v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v13 ; encoding: [0x0a,0x80,0xec,0xd3,0x02,0x09,0x36,0x14]
0x0a,0x80,0xec,0xd3,0x02,0x09,0x36,0x14
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x00,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0xf0,0x80,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x04]
+0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x0c]
+0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x0c
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x04]
+0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x14]
+0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x14
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0x09,0xfe,0x07]
+0x0a,0x00,0xec,0xd3,0x02,0x09,0xfe,0x07
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x02,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x07,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x08,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x38,0xec,0xd3,0x02,0x09,0x0e,0x04
+
+# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xec,0xd3,0x02,0x09,0x0e,0x04]
+0x0a,0x78,0xec,0xd3,0x02,0x09,0x0e,0x04
More information about the llvm-commits
mailing list