[llvm-branch-commits] [llvm] AMDGPU: Add minimum3/maximum3 pkf16 for gfx950 encodings (PR #117601)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Nov 25 10:15:50 PST 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/117601
None
>From d6a1de80431f1b8db2da27d5fa66f9d3233962cd Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 22 May 2024 19:45:57 +0200
Subject: [PATCH] AMDGPU: Add minimum3/maximum3 pkf16 for gfx950 encodings
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 13 ++-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 5 +
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 8 ++
llvm/test/MC/AMDGPU/gfx950_asm_features.s | 97 +++++++++++++++++++
llvm/test/MC/AMDGPU/gfx950_err.s | 6 ++
.../Disassembler/AMDGPU/gfx950_dasm_vop3.txt | 61 ++++++++++++
6 files changed, 189 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index bda0b85f16a9c4..5cd1501cdb1fa9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -149,6 +149,12 @@ def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16",
"Has v_minimum3_f16 and v_maximum3_f16 instructions"
>;
+def FeatureMinimum3Maximum3PKF16 : SubtargetFeature<"minimum3-maximum3-pkf16",
+ "HasMinimum3Maximum3PKF16",
+ "true",
+ "Has v_pk_minimum3_f16 and v_pk_maximum3_f16 instructions"
+>;
+
def FeatureSupportsXNACK : SubtargetFeature<"xnack-support",
"SupportsXNACK",
"true",
@@ -432,7 +438,8 @@ def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
FeatureFP4ConversionScaleInsts,
FeatureFP6BF6ConversionScaleInsts,
FeatureF16BF16ToFP6BF6ConversionScaleInsts,
- FeatureMinimum3Maximum3F32
+ FeatureMinimum3Maximum3F32,
+ FeatureMinimum3Maximum3PKF16
]
>;
@@ -2147,6 +2154,10 @@ def HasMinimum3Maximum3F16 :
Predicate<"Subtarget->hasMinimum3Maximum3F16()">,
AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>;
+def HasMinimum3Maximum3PKF16 :
+ Predicate<"Subtarget->hasMinimum3Maximum3PKF16()">,
+ AssemblerPredicate<(all_of FeatureMinimum3Maximum3PKF16)>;
+
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index cdc5e1a66afa2c..ea5e159fdd8363 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -250,6 +250,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAshrPkInsts = false;
bool HasMinimum3Maximum3F32 = false;
bool HasMinimum3Maximum3F16 = false;
+ bool HasMinimum3Maximum3PKF16 = false;
bool RequiresCOV6 = false;
@@ -1348,6 +1349,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return HasMinimum3Maximum3F16;
}
+ bool hasMinimum3Maximum3PKF16() const {
+ return HasMinimum3Maximum3PKF16;
+ }
+
/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
/// instruction.
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index ee68eb32d9173a..ae5a6581a3b200 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -144,6 +144,11 @@ def : VOP3PSatPat<usubsat, V_PK_SUB_U16>;
def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>;
} // End SubtargetPredicate = HasVOP3PInsts
+let SubtargetPredicate = HasMinimum3Maximum3PKF16, FPDPRounding = 1 in {
+defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
+defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
+}
+
// TODO: Make sure we're doing the right thing with denormals. Note
// that FMA and MAD will differ.
multiclass MadFmaMixPats<SDPatternOperator fma_like,
@@ -2050,6 +2055,9 @@ defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>;
defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>;
defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>;
+defm V_PK_MINIMUM3_F16 : VOP3P_Real_vi <0x1b>;
+defm V_PK_MAXIMUM3_F16 : VOP3P_Real_vi <0x1c>;
+
defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>;
defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>;
defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>;
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 68d93b4abf5a72..75022d8cf0cdd0 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -1182,3 +1182,100 @@ v_maximum3_f32 v1, v2, s8, v3
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
v_minimum3_f32 v0, v1, v2, v3
+
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c]
+v_pk_minimum3_f16 v1, v2, v3, v4
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b]
+v_pk_minimum3_f16 v1, v2, v3, 2.0
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c]
+v_pk_minimum3_f16 v1, v2, 2.0, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c]
+v_pk_minimum3_f16 v1, 2.0, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c]
+v_pk_minimum3_f16 v1, v2, v3, v4 clamp
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c]
+v_pk_minimum3_f16 v8, v0, s8, v1
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18]
+v_pk_minimum3_f16 v8, v0, v1, s8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_minimum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04]
+v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04]
+v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c]
+v_pk_maximum3_f16 v1, v2, v3, v4
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b]
+v_pk_maximum3_f16 v1, v2, v3, 2.0
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c]
+v_pk_maximum3_f16 v1, v2, 2.0, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c]
+v_pk_maximum3_f16 v1, 2.0, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c]
+v_pk_maximum3_f16 v1, v2, v3, v4 clamp
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c]
+v_pk_maximum3_f16 v8, v0, s8, v1
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18]
+v_pk_maximum3_f16 v8, v0, v1, s8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_maximum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04]
+v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04]
+v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index 03b651260b2886..c5450e48558bfd 100644
--- a/llvm/test/MC/AMDGPU/gfx950_err.s
+++ b/llvm/test/MC/AMDGPU/gfx950_err.s
@@ -386,3 +386,9 @@ v_minimum3_f32 v0, s1, s2, v3
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
v_minimum3_f32 v0, v1, v2, 0xdeadbeef
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_pk_minimum3_f16 v0, s1, s2, v3
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_pk_maximum3_f16 v0, s1, s2, v3
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index f7cb738375d224..adb4f78942503e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -881,3 +881,64 @@
# GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04
+
+
+# GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c]
+0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c
+
+# GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c]
+0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c
+
+# GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b]
+0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b
+
+# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c]
+0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c
+
+# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c]
+0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c
+
+# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
+0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c
+
+# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04]
+0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04
+
+# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04]
+0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04
+
+# GFX950: v_pk_maximum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c]
+0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c
+
+# GFX950: v_pk_maximum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18]
+0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18
+
+# GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c]
+0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c
+
+# GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c]
+0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c
+
+# GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b]
+0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b
+
+# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c]
+0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c
+
+# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c]
+0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c
+
+# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
+0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c
+
+# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04]
+0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04
+
+# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04]
+0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04
+
+# GFX950: v_pk_minimum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c]
+0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c
+
+# GFX950: v_pk_minimum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18]
+0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18
More information about the llvm-branch-commits
mailing list