[llvm] AMDGPU: Add encodings for minimum3/maximum3 f32 for gfx950 (PR #117600)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 19:55:23 PST 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117600
>From 0c90f839f0d931ecacebf89fb32da4468ceb6f1c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 22 May 2024 19:36:47 +0200
Subject: [PATCH] AMDGPU: Add encodings for minimum3/maximum3 f32 for gfx950
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 4 ++-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 3 ++
llvm/test/MC/AMDGPU/gfx950_asm_features.s | 33 +++++++++++++++++++
llvm/test/MC/AMDGPU/gfx950_err.s | 33 +++++++++++++++++++
.../Disassembler/AMDGPU/gfx950_dasm_vop3.txt | 24 ++++++++++++++
5 files changed, 96 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 89a35d911e8b1b..0569f70077df4a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -431,7 +431,9 @@ def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
FeatureBF8ConversionScaleInsts,
FeatureFP4ConversionScaleInsts,
FeatureFP6BF6ConversionScaleInsts,
- FeatureF16BF16ToFP6BF6ConversionScaleInsts]
+ FeatureF16BF16ToFP6BF6ConversionScaleInsts,
+ FeatureMinimum3Maximum3F32
+ ]
>;
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 11700a4c34f9f3..5d4d56e8b0ad22 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1937,6 +1937,9 @@ defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
+defm V_MINIMUM3_F32 : VOP3_Real_vi <0x2a8>;
+defm V_MAXIMUM3_F32 : VOP3_Real_vi <0x2a9>;
+
defm V_BITOP3_B16 : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">;
defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
let OtherPredicates = [HasFP8ConversionScaleInsts] in {
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 5aa1e2d5ccb11d..68d93b4abf5a72 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -1149,3 +1149,36 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7]
buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095
+
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04]
+v_maximum3_f32 v1, v2, v3, v4
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, -v2, -v3, -v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4]
+v_maximum3_f32 v1, -v2, -v3, -v4
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, -|v2|, -|v3|, -|v4| ; encoding: [0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4]
+v_maximum3_f32 v1, -|v2|, -|v3|, -|v4|
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, 0, 1.0, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04]
+v_maximum3_f32 v1, 0.0, 1.0, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v2, 0, v3, 1.0 ; encoding: [0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03]
+v_maximum3_f32 v2, 0.0, v3, 1.0
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, s8, v3, 1.0 ; encoding: [0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03]
+v_maximum3_f32 v1, s8, v3, 1.0
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_maximum3_f32 v1, v2, s8, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04]
+v_maximum3_f32 v1, v2, s8, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
+v_minimum3_f32 v0, v1, v2, v3
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index fd3da56c5130c4..03b651260b2886 100644
--- a/llvm/test/MC/AMDGPU/gfx950_err.s
+++ b/llvm/test/MC/AMDGPU/gfx950_err.s
@@ -353,3 +353,36 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 dlc
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc slc dlc
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_maximum3_f16 v0, v1, v2, v3
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_minimum3_f16 v0, v1, v2, v3
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_maximum_f16 v0, v1, v2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_minimum_f16 v0, v1, v2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_maximum_f32 v0, v1, v2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_minimum_f32 v0, v1, v2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_maximum3_f32 v0, s1, s2, v3
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_maximum3_f32 v0, v3, s1, s2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_maximum3_f32 v0, s1, v3, s2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_minimum3_f32 v0, s1, s2, v3
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
+v_minimum3_f32 v0, v1, v2, 0xdeadbeef
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index ca8b1750a579e2..f7cb738375d224 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -857,3 +857,27 @@
# GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02]
0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02
+
+# GFX950: v_maximum3_f32 v1, -v2, -v3, -v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4]
+0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4
+
+# GFX950: v_maximum3_f32 v1, -|v2|, -|v3|, -|v4| ; encoding: [0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4]
+0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4
+
+# GFX950: v_maximum3_f32 v1, 0, 1.0, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04]
+0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04
+
+# GFX950: v_maximum3_f32 v1, s8, v3, 1.0 ; encoding: [0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03]
+0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03
+
+# GFX950: v_maximum3_f32 v1, v2, s8, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04]
+0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04
+
+# GFX950: v_maximum3_f32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04]
+0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04
+
+# GFX950: v_maximum3_f32 v2, 0, v3, 1.0 ; encoding: [0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03]
+0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03
+
+# GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
+0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04
More information about the llvm-commits
mailing list