[llvm] 641ad52 - [AMDGPU][MC] Fix disassembly for v_permlane16_swap_b32 for GFX950 (#146600)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 2 10:05:29 PDT 2025
Author: Jun Wang
Date: 2025-07-02T10:05:25-07:00
New Revision: 641ad52b6ab4ea80c16becffb79e249fc5039f18
URL: https://github.com/llvm/llvm-project/commit/641ad52b6ab4ea80c16becffb79e249fc5039f18
DIFF: https://github.com/llvm/llvm-project/commit/641ad52b6ab4ea80c16becffb79e249fc5039f18.diff
LOG: [AMDGPU][MC] Fix disassembly for v_permlane16_swap_b32 for GFX950 (#146600)
When targeting GFX950, disassembly of v_permlane16_swap_b32 and
v_permlane32_swap_b32 instructions produces errors when they use certain
vdst operand values, e.g., v_permlane16_swap_b32 v218, v219. This patch
fixes this problem.
Added:
Modified:
llvm/lib/Target/AMDGPU/VOP1Instructions.td
llvm/test/MC/AMDGPU/gfx950_asm_features.s
llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index cf02c5b2454b3..0dacd9df71305 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -401,8 +401,8 @@ def VOP_PERMLANE_SWAP : VOPProfile<[i32, i32, untyped, untyped]> {
let HasExtDPP = 0;
let HasExtSDWA = 0;
- let Ins32 = (ins Src0RC64:$vdst_in, Src0RC32:$src0);
- let Ins64 = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
+ let Ins32 = (ins DstRC:$vdst_in, Src0RC32:$src0);
+ let Ins64 = (ins DstRC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
let InsVOP3OpSel = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
let Asm64 = "$vdst, $src0$bound_ctrl$fi";
}
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 7bc47914f40b7..57fc573b37ba9 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -40,14 +40,26 @@ global_load_lds_dwordx4 v2, s[4:5] offset:4
// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
v_permlane16_swap_b32 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
+v_permlane16_swap_b32 v218, v219
+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
v_permlane16_swap_b32_e32 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
+v_permlane16_swap_b32_e32 v218, v219
+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
v_permlane16_swap_b32_e64 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane16_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
+v_permlane16_swap_b32_e64 v218, v219
+
// FIXME: Parsed as bound_ctrl:1?
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00]
@@ -81,14 +93,26 @@ v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1
// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
v_permlane32_swap_b32 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
+v_permlane32_swap_b32 v218, v219
+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
v_permlane32_swap_b32_e32 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
+v_permlane32_swap_b32_e32 v218, v219
+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
v_permlane32_swap_b32_e64 v1, v2
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_permlane32_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
+v_permlane32_swap_b32_e64 v218, v219
+
// FIXME: Parsed as bound_ctrl:1?
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
index 9fc9c58387b90..01821593b0707 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt
@@ -47,9 +47,27 @@
# GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
0x02,0xb3,0x02,0x7e
+# GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
+0xdb,0xb3,0xb4,0x7f
+
+# GFX950: v_permlane16_swap_b32_e32 v218, v2 ; encoding: [0x02,0xb3,0xb4,0x7f]
+0x02,0xb3,0xb4,0x7f
+
+# GFX950: v_permlane16_swap_b32_e32 v2, v219 ; encoding: [0xdb,0xb3,0x04,0x7e]
+0xdb,0xb3,0x04,0x7e
+
# GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00
+# GFX950: v_permlane16_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
+0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00
+
+# GFX950: v_permlane16_swap_b32_e64 v218, v2 ; encoding: [0xda,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
+0xda,0x00,0x99,0xd1,0x02,0x01,0x00,0x00
+
+# GFX950: v_permlane16_swap_b32_e64 v2, v219 ; encoding: [0x02,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
+0x02,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00
+
# GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00]
0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00
@@ -63,9 +81,27 @@
# GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
0x02,0xb5,0x02,0x7e
+# GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
+0xdb,0xb5,0xb4,0x7f
+
+# GFX950: v_permlane32_swap_b32_e32 v218, v2 ; encoding: [0x02,0xb5,0xb4,0x7f]
+0x02,0xb5,0xb4,0x7f
+
+# GFX950: v_permlane32_swap_b32_e32 v2, v219 ; encoding: [0xdb,0xb5,0x04,0x7e]
+0xdb,0xb5,0x04,0x7e
+
# GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00
+# GFX950: v_permlane32_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
+0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00
+
+# GFX950: v_permlane32_swap_b32_e64 v218, v2 ; encoding: [0xda,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
+0xda,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00
+
+# GFX950: v_permlane32_swap_b32_e64 v2, v219 ; encoding: [0x02,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
+0x02,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00
+
# GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00]
0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00
More information about the llvm-commits
mailing list