[llvm] [AMDGPU][MC] In GFX11+ v_pk_fmac_f16 should not allow DPP (PR #148751)
Jun Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 30 12:03:15 PDT 2025
https://github.com/jwanggit86 updated https://github.com/llvm/llvm-project/pull/148751
>From 3a8b4087233b698dba968d16bc99f2ae894eed16 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Mon, 14 Jul 2025 17:08:07 -0700
Subject: [PATCH 1/3] [AMDGPU][MC] In GFX11+ v_pk_fmac_f16 should not allow DPP
In GFX11+ the instruction v_pk_fmac_f16 should not allow DPP.
---
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +-
.../MC/AMDGPU/gfx11_asm_vop2_fake16_err.s | 15 ++++++++++++++
llvm/test/MC/AMDGPU/gfx12_asm_vop2_err.s | 20 +++++++++++++++++++
3 files changed, 36 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop2_err.s
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 550ec9d3f55ab..cd7e399f65c55 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1904,7 +1904,7 @@ multiclass VOP2_Real_FULL_with_name_gfx11_gfx12<bits<6> op, string opName,
VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
multiclass VOP2_Real_e32_gfx11_gfx12<bits<6> op> :
- VOP2Only_Real<GFX11Gen, op>, VOP2Only_Real<GFX12Gen, op>;
+ VOP2Only_Real_e32<GFX11Gen, op>, VOP2Only_Real_e32<GFX12Gen, op>;
multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_fake16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_fake16_err.s
index bc0f586e1d411..f586e4a33c520 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_fake16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_fake16_err.s
@@ -173,6 +173,21 @@ v_mul_f16_e32 v5, v1, v255
v_mul_f16_e32 v5, v255, v2
// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+v_pk_fmac_f16 v0, v1, v2 quad_perm:[1,2,3,0]
+// GFX11: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16 v0, v1, v2 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0
+// GFX11: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16_dpp v0, v1, v2 quad_perm:[1,2,3,0]
+// GFX11: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported
+
+v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported
+
v_sub_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_err.s
new file mode 100644
index 0000000000000..b7d93e1a3b3c8
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_err.s
@@ -0,0 +1,20 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --sort --version 5
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error: %s
+
+v_pk_fmac_f16 v0, v1, v2 quad_perm:[1,2,3,0]
+// GFX12: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16 v0, v1, v2 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0
+// GFX12: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:26: error: not a valid operand.
+
+v_pk_fmac_f16_dpp v0, v1, v2 quad_perm:[1,2,3,0]
+// GFX12: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported
+
+v_pk_fmac_f16_dpp v0, v1, v2 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0
+// GFX12: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported
+
+v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:1: error: dpp variant of this instruction is not supported
>From f57fcbbee679126961f8be7a3a8e347815ee9e9c Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Tue, 29 Jul 2025 13:29:57 -0700
Subject: [PATCH 2/3] Add comments.
---
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index cd7e399f65c55..330f556d9290e 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1966,6 +1966,8 @@ defm V_SUBREV_CO_CI_U32 :
defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x02f,
"V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">;
+// FIXME: V_PK_FMAC_F16 is currently not used in instruction selection.
+// If this changes, ensure the DPP variant is not used for GFX11+.
defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx11_gfx12<0x03c>;
defm V_ADD_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x032, "v_add_f16">;
>From 7eab2264557e7165b7dc3bccee159cad81d1d19e Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Wed, 30 Jul 2025 12:02:16 -0700
Subject: [PATCH 3/3] Move comments to pseudo instruction
---
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 330f556d9290e..9de7d6d009fe1 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1344,6 +1344,8 @@ def V_FMAAK_F64 : VOP2_Pseudo<"v_fmaak_f64", VOP_MADAK_F64, [], "">;
} // End SubtargetPredicate = HasFmaakFmamkF64Insts, isReMaterializable = 1, FixedSize = 1, Size = 12, SchedRW = [Write64Bit]
let SubtargetPredicate = HasPkFmacF16Inst in {
+// FIXME: V_PK_FMAC_F16 is currently not used in instruction selection.
+// If this changes, ensure the DPP variant is not used for GFX11+.
defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
} // End SubtargetPredicate = HasPkFmacF16Inst
@@ -1966,8 +1968,6 @@ defm V_SUBREV_CO_CI_U32 :
defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x02f,
"V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">;
-// FIXME: V_PK_FMAC_F16 is currently not used in instruction selection.
-// If this changes, ensure the DPP variant is not used for GFX11+.
defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx11_gfx12<0x03c>;
defm V_ADD_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x032, "v_add_f16">;
More information about the llvm-commits
mailing list