[llvm] [AMDGPU] Allow dpp in v_pk_fmac_f16 for GFX9 and GFX10 (PR #144782)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 18 12:05:24 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jun Wang (jwanggit86)
<details>
<summary>Changes</summary>
Allows dpp in v_pk_fmac-f16 for GFX9, and both dpp and dpp8 for GFX10.
---
Full diff: https://github.com/llvm/llvm-project/pull/144782.diff
6 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+8)
- (modified) llvm/test/MC/AMDGPU/gfx10_asm_vop2.s (+12)
- (added) llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s (+13)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt (+7)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt (+3)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt (+10)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 0c7e20fc1ebf3..c459c4df11e9e 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -2172,6 +2172,7 @@ defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
let IsSingle = 1 in {
defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
}
+defm V_PK_FMAC_F16 : VOP2_Real_dpp_gfx10<0x03c>, VOP2_Real_dpp8_gfx10<0x03c>;
// VOP2 no carry-in, carry-out.
defm V_ADD_NC_U32 :
@@ -2504,6 +2505,11 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
}
+ multiclass VOP2_Real_dpp_gfx9<bits<6> op> {
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
+ def _dpp_gfx9 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>;
+ }
+
} // End DecoderNamespace = "GFX9"
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
@@ -2560,6 +2566,8 @@ defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_s
defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>;
defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>;
defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>;
+
+defm V_PK_FMAC_F16 : VOP2_Real_dpp_gfx9<0x03c>;
} // End AssemblerPredicate = isGFX9Only
defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>;
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
index 3dcf288bbbaa5..bbd36a9140b96 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
@@ -13185,3 +13185,15 @@ v_pk_fmac_f16 v5, -4.0, v2
v_pk_fmac_f16 v5, v1, v255
// GFX10: encoding: [0x01,0xff,0x0b,0x78]
+
+v_pk_fmac_f16 v5, v1, v2
+// GFX10: encoding: [0x01,0x05,0x0a,0x78]
+
+v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff]
+
+v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00]
+
+v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX10: encoding: [0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s
new file mode 100644
index 0000000000000..f7dab2d0359dc
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx90a -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s
+
+v_pk_fmac_f16 v5, v1, v2
+// CHECK-MI: [0x01,0x05,0x0a,0x78]
+
+v_pk_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3]
+// CHECK-MI: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff]
+
+v_pk_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+// CHECK-MI: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt
index 1774efa4a65c7..4a7471e6c6f98 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt
@@ -2476,3 +2476,10 @@
# W32: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff]
# W64: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff]
0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff
+
+# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff]
+0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff
+
+# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00]
+0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00
+
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt
index 40b8f24e4d72f..233f93a5b8e7d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt
@@ -222,3 +222,6 @@
# W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa]
# W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa]
0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa
+
+# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05]
+0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt
new file mode 100644
index 0000000000000..ac1ef4baa9aa4
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx908 -disassemble -show-encoding < %s | FileCheck -check-prefix=CHECK-MI %s
+
+# CHECK-MI: v_pk_fmac_f16_e32 v5, v1, v2
+0x01,0x05,0x0a,0x78
+
+# CHECK-MI: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff
+
+# CHECK-MI: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00
``````````
</details>
https://github.com/llvm/llvm-project/pull/144782
More information about the llvm-commits
mailing list