[llvm] [AMDGPU][True16][MC] Support v_swap_b16. (PR #100442)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 24 12:19:39 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
Author: Brox Chen (broxigarchen)
<details>
<summary>Changes</summary>
Added v_swap_b16 support in AMDGPU codeGen
---
Full diff: https://github.com/llvm/llvm-project/pull/100442.diff
7 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrFormats.td (+6)
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+17-1)
- (modified) llvm/test/MC/AMDGPU/gfx10_unsupported.s (+3)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1.s (+9)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/decode-err.txt (+9)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt (+6)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 1fe8beafd5e5d..9b506eb0a711a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -321,6 +321,12 @@ def VOPDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
}
+// Source-encoded destination operand for instructions like v_swap_b16.
+def VOPSrcEncodedDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
+ let EncoderMethod = VSrcT_b16_Lo128.EncoderMethod;
+ let DecoderMethod = VSrcT_b16_Lo128.DecoderMethod;
+}
+
class VINTRPe <bits<2> op> : Enc32 {
bits<8> vdst;
bits<8> vsrc;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 2c0d61ee4afa1..c08903d03939c 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -729,7 +729,22 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1
let isAsCheapAsAMove = 1;
}
+def VOP_SWAP_I16 : VOPProfile_True16<VOP_I16_I16> {
+ let Outs32 = (outs VOPDstOperand_t16Lo128:$vdst,
+ VOPSrcEncodedDstOperand_t16Lo128:$vdst1);
+ let Ins32 = (ins VOPSrcEncodedDstOperand_t16Lo128:$src0,
+ VOPDstOperand_t16Lo128:$src1);
+ let Asm32 = " $vdst, $src0";
+}
+
let SubtargetPredicate = isGFX11Plus in {
+ def V_SWAP_B16 : VOP1_Pseudo<"v_swap_b16", VOP_SWAP_I16, [], /* VOP1Only= */ 1> {
+ let Constraints = "$vdst = $src1, $vdst1 = $src0";
+ let DisableEncoding = "$vdst1, $src1";
+ let SchedRW = [Write64Bit, Write64Bit];
+ }
+ // TODO-GFX11 select new insts
+ defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>;
// Restrict src0 to be VGPR
def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
[], /*VOP1Only=*/ 1> {
@@ -952,7 +967,8 @@ defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03a,
"V_FFBL_B32", "v_ctz_i32_b32">;
defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
"V_FFBH_I32", "v_cls_i32">;
-defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
+defm V_SWAP_B16 : VOP1Only_Real_gfx11_gfx12<0x066>;
+defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
diff --git a/llvm/test/MC/AMDGPU/gfx10_unsupported.s b/llvm/test/MC/AMDGPU/gfx10_unsupported.s
index 46b4e6ffb4037..1374417ac354b 100644
--- a/llvm/test/MC/AMDGPU/gfx10_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx10_unsupported.s
@@ -3287,6 +3287,9 @@ v_subrev_u32_e64 v255, s[12:13], v1, v2
v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_swap_b16 v0.l, v0.l
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
v_wmma_bf16_16x16x16_bf16 v[16:19], 1.0, v[8:15], v[16:19]
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
index d95ef6f15e48d..90d5ca7f72751 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
@@ -3448,6 +3448,15 @@ v_sqrt_f64 v[5:6], src_scc
v_sqrt_f64 v[254:255], 0xaf123456
// GFX11: encoding: [0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf]
+v_swap_b16 v5.l, v1.h
+// GFX11: encoding: [0x81,0xcd,0x0a,0x7e]
+
+v_swap_b16 v5.h, v1.l
+// GFX11: encoding: [0x01,0xcd,0x0a,0x7f]
+
+v_swap_b16 v127.l, v127.l
+// GFX11: encoding: [0x7f,0xcd,0xfe,0x7e]
+
v_swap_b32 v5, v1
// GFX11: encoding: [0x01,0xcb,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s
index 5b5381b752feb..ab587a524fc6e 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s
@@ -211,6 +211,18 @@ v_sqrt_f16_e32 v255.l, v1.l
v_sqrt_f16_e32 v5.l, v199.l
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_swap_b16_e32 v128.l, v0.l
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swap_b16_e32 v0.l, s0
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swap_b16_e32 v0.l, 0
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swap_b16_e32 v0.l, 0xfe0b
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
v_trunc_f16_e32 v128, 0xfe0b
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
index f6d2a19326e1d..c15b65728b247 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
@@ -19,6 +19,15 @@
# W64: [[@LINE+1]]:1: warning: invalid instruction encoding
0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf
+# GFX11: v_swap_b16 v5.h, s1/*Invalid register, operand has 'VGPR_16_Lo128' register class*/ ; encoding: [0x01,0xcc,0x0a,0x7f]
+0x01,0xcc,0x0a,0x7f
+
+# GFX11: v_swap_b16 v5.h, 0x3c00/*Invalid immediate*/ ; encoding: [0x00,0xcc,0x0a,0x7f]
+0xf2,0xcc,0x0a,0x7f
+
+# GFX11: v_swap_b16 v5.h, 0x78563412/*Invalid immediate*/ ; encoding: [0x12,0xcc,0x0a,0x7f]
+0xff,0xcc,0x0a,0x7f,0x12,0x34,0x56,0x78
+
# W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
# W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
index b176a57d70f86..778f7deb4ec1a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
@@ -3344,6 +3344,12 @@
# GFX11: v_sqrt_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf]
0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf
+# GFX11: v_swap_b16 v5.l, v1.h ; encoding: [0x81,0xcd,0x0a,0x7e]
+0x81,0xcd,0x0a,0x7e
+
+# GFX11: v_swap_b16 v5.h, v1.l ; encoding: [0x01,0xcd,0x0a,0x7f]
+0x01,0xcd,0x0a,0x7f
+
# GFX11: v_swap_b32 v5, v1 ; encoding: [0x01,0xcb,0x0a,0x7e]
0x01,0xcb,0x0a,0x7e
``````````
</details>
https://github.com/llvm/llvm-project/pull/100442
More information about the llvm-commits
mailing list