[llvm] AMDGPU: Add encodings for minimum3/maximum3 f32 for gfx950 (PR #117600)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 25 19:55:23 PST 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117600

>From 0c90f839f0d931ecacebf89fb32da4468ceb6f1c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 22 May 2024 19:36:47 +0200
Subject: [PATCH] AMDGPU: Add encodings for minimum3/maximum3 f32 for gfx950

---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |  4 ++-
 llvm/lib/Target/AMDGPU/VOP3Instructions.td    |  3 ++
 llvm/test/MC/AMDGPU/gfx950_asm_features.s     | 33 +++++++++++++++++++
 llvm/test/MC/AMDGPU/gfx950_err.s              | 33 +++++++++++++++++++
 .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt  | 24 ++++++++++++++
 5 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 89a35d911e8b1b..0569f70077df4a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -431,7 +431,9 @@ def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
    FeatureBF8ConversionScaleInsts,
    FeatureFP4ConversionScaleInsts,
    FeatureFP6BF6ConversionScaleInsts,
-   FeatureF16BF16ToFP6BF6ConversionScaleInsts]
+   FeatureF16BF16ToFP6BF6ConversionScaleInsts,
+   FeatureMinimum3Maximum3F32
+   ]
 >;
 
 def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 11700a4c34f9f3..5d4d56e8b0ad22 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1937,6 +1937,9 @@ defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
 defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
 defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
 
+defm V_MINIMUM3_F32 : VOP3_Real_vi <0x2a8>;
+defm V_MAXIMUM3_F32 : VOP3_Real_vi <0x2a9>;
+
 defm V_BITOP3_B16         : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">;
 defm V_BITOP3_B32         : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
 let OtherPredicates = [HasFP8ConversionScaleInsts] in {
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 5aa1e2d5ccb11d..68d93b4abf5a72 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -1149,3 +1149,36 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7]
 buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095
+
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, v2, v3, v4           ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04]
+v_maximum3_f32 v1, v2, v3, v4
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, -v2, -v3, -v4        ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4]
+v_maximum3_f32 v1, -v2, -v3, -v4
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, -|v2|, -|v3|, -|v4|  ; encoding: [0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4]
+v_maximum3_f32 v1, -|v2|, -|v3|, -|v4|
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, 0, 1.0, v3           ; encoding: [0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04]
+v_maximum3_f32 v1, 0.0, 1.0, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v2, 0, v3, 1.0           ; encoding: [0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03]
+v_maximum3_f32 v2, 0.0, v3, 1.0
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, s8, v3, 1.0          ; encoding: [0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03]
+v_maximum3_f32 v1, s8, v3, 1.0
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, v2, s8, v3           ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04]
+v_maximum3_f32 v1, v2, s8, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_minimum3_f32 v0, v1, v2, v3           ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
+v_minimum3_f32 v0, v1, v2, v3
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index fd3da56c5130c4..03b651260b2886 100644
--- a/llvm/test/MC/AMDGPU/gfx950_err.s
+++ b/llvm/test/MC/AMDGPU/gfx950_err.s
@@ -353,3 +353,36 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 dlc
 
 // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc slc dlc
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_maximum3_f16 v0, v1, v2, v3
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_minimum3_f16 v0, v1, v2, v3
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_maximum_f16 v0, v1, v2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_minimum_f16 v0, v1, v2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_maximum_f32 v0, v1, v2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_minimum_f32 v0, v1, v2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_maximum3_f32 v0, s1, s2, v3
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_maximum3_f32 v0, v3, s1, s2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_maximum3_f32 v0, s1, v3, s2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_minimum3_f32 v0, s1, s2, v3
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
+v_minimum3_f32 v0, v1, v2, 0xdeadbeef
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index ca8b1750a579e2..f7cb738375d224 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -857,3 +857,27 @@
 
 # GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02]
 0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02
+
+# GFX950: v_maximum3_f32 v1, -v2, -v3, -v4        ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4]
+0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4
+
+# GFX950: v_maximum3_f32 v1, -|v2|, -|v3|, -|v4|  ; encoding: [0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4]
+0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4
+
+# GFX950: v_maximum3_f32 v1, 0, 1.0, v3           ; encoding: [0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04]
+0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04
+
+# GFX950: v_maximum3_f32 v1, s8, v3, 1.0          ; encoding: [0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03]
+0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03
+
+# GFX950: v_maximum3_f32 v1, v2, s8, v3           ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04]
+0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04
+
+# GFX950: v_maximum3_f32 v1, v2, v3, v4           ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04]
+0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04
+
+# GFX950: v_maximum3_f32 v2, 0, v3, 1.0           ; encoding: [0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03]
+0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03
+
+# GFX950: v_minimum3_f32 v0, v1, v2, v3           ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04



More information about the llvm-commits mailing list