[llvm-branch-commits] [llvm] AMDGPU: Add minimum3/maximum3 pkf16 for gfx950 encodings (PR #117601)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Nov 25 10:25:40 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/117601.diff


6 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+12-1) 
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+5) 
- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+8) 
- (modified) llvm/test/MC/AMDGPU/gfx950_asm_features.s (+97) 
- (modified) llvm/test/MC/AMDGPU/gfx950_err.s (+6) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt (+61) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index bda0b85f16a9c4..5cd1501cdb1fa9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -149,6 +149,12 @@ def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16",
   "Has v_minimum3_f16 and v_maximum3_f16 instructions"
 >;
 
+def FeatureMinimum3Maximum3PKF16 : SubtargetFeature<"minimum3-maximum3-pkf16",
+  "HasMinimum3Maximum3PKF16",
+  "true",
+  "Has v_pk_minimum3_f16 and v_pk_maximum3_f16 instructions"
+>;
+
 def FeatureSupportsXNACK : SubtargetFeature<"xnack-support",
   "SupportsXNACK",
   "true",
@@ -432,7 +438,8 @@ def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
    FeatureFP4ConversionScaleInsts,
    FeatureFP6BF6ConversionScaleInsts,
    FeatureF16BF16ToFP6BF6ConversionScaleInsts,
-   FeatureMinimum3Maximum3F32
+   FeatureMinimum3Maximum3F32,
+   FeatureMinimum3Maximum3PKF16
    ]
 >;
 
@@ -2147,6 +2154,10 @@ def HasMinimum3Maximum3F16 :
   Predicate<"Subtarget->hasMinimum3Maximum3F16()">,
   AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>;
 
+def HasMinimum3Maximum3PKF16 :
+  Predicate<"Subtarget->hasMinimum3Maximum3PKF16()">,
+  AssemblerPredicate<(all_of FeatureMinimum3Maximum3PKF16)>;
+
 
 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
   AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index cdc5e1a66afa2c..ea5e159fdd8363 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -250,6 +250,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasAshrPkInsts = false;
   bool HasMinimum3Maximum3F32 = false;
   bool HasMinimum3Maximum3F16 = false;
+  bool HasMinimum3Maximum3PKF16 = false;
 
   bool RequiresCOV6 = false;
 
@@ -1348,6 +1349,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return HasMinimum3Maximum3F16;
   }
 
+  bool hasMinimum3Maximum3PKF16() const {
+    return HasMinimum3Maximum3PKF16;
+  }
+
   /// \returns The maximum number of instructions that can be enclosed in an
   /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
   /// instruction.
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index ee68eb32d9173a..ae5a6581a3b200 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -144,6 +144,11 @@ def : VOP3PSatPat<usubsat, V_PK_SUB_U16>;
 def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>;
 } // End SubtargetPredicate = HasVOP3PInsts
 
+let SubtargetPredicate = HasMinimum3Maximum3PKF16, FPDPRounding = 1 in {
+defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
+defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
+}
+
 // TODO: Make sure we're doing the right thing with denormals. Note
 // that FMA and MAD will differ.
 multiclass MadFmaMixPats<SDPatternOperator fma_like,
@@ -2050,6 +2055,9 @@ defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>;
 defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>;
 defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>;
 
+defm V_PK_MINIMUM3_F16 : VOP3P_Real_vi <0x1b>;
+defm V_PK_MAXIMUM3_F16 : VOP3P_Real_vi <0x1c>;
+
 defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>;
 defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>;
 defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>;
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 68d93b4abf5a72..75022d8cf0cdd0 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -1182,3 +1182,100 @@ v_maximum3_f32 v1, v2, s8, v3
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_minimum3_f32 v0, v1, v2, v3           ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
 v_minimum3_f32 v0, v1, v2, v3
+
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4        ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c]
+v_pk_minimum3_f16 v1, v2, v3, v4
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0       ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b]
+v_pk_minimum3_f16 v1, v2, v3, 2.0
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3       ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c]
+v_pk_minimum3_f16 v1, v2, 2.0, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3       ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c]
+v_pk_minimum3_f16 v1, 2.0, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp  ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c]
+v_pk_minimum3_f16 v1, v2, v3, v4 clamp
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s8, v1        ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c]
+v_pk_minimum3_f16 v8, v0, s8, v1
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, v1, s8        ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18]
+v_pk_minimum3_f16 v8, v0, v1, s8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1        ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_minimum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1        ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1        ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04]
+v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04]
+v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4        ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c]
+v_pk_maximum3_f16 v1, v2, v3, v4
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0       ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b]
+v_pk_maximum3_f16 v1, v2, v3, 2.0
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3       ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c]
+v_pk_maximum3_f16 v1, v2, 2.0, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3       ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c]
+v_pk_maximum3_f16 v1, 2.0, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp  ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c]
+v_pk_maximum3_f16 v1, v2, v3, v4 clamp
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s8, v1        ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c]
+v_pk_maximum3_f16 v8, v0, s8, v1
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, v1, s8        ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18]
+v_pk_maximum3_f16 v8, v0, v1, s8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1        ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_maximum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1        ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1        ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
+v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04]
+v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04]
+v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index 03b651260b2886..c5450e48558bfd 100644
--- a/llvm/test/MC/AMDGPU/gfx950_err.s
+++ b/llvm/test/MC/AMDGPU/gfx950_err.s
@@ -386,3 +386,9 @@ v_minimum3_f32 v0, s1, s2, v3
 
 // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
 v_minimum3_f32 v0, v1, v2, 0xdeadbeef
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_pk_minimum3_f16 v0, s1, s2, v3
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+v_pk_maximum3_f16 v0, s1, s2, v3
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index f7cb738375d224..adb4f78942503e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -881,3 +881,64 @@
 
 # GFX950: v_minimum3_f32 v0, v1, v2, v3           ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
 0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04
+
+
+# GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3       ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c]
+0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c
+
+# GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3       ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c]
+0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c
+
+# GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0       ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b]
+0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b
+
+# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4        ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c]
+0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c
+
+# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp  ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c]
+0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c
+
+# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1        ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
+0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c
+
+# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04]
+0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04
+
+# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04]
+0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04
+
+# GFX950: v_pk_maximum3_f16 v8, v0, s8, v1        ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c]
+0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c
+
+# GFX950: v_pk_maximum3_f16 v8, v0, v1, s8        ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18]
+0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18
+
+# GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3       ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c]
+0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c
+
+# GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3       ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c]
+0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c
+
+# GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0       ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b]
+0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b
+
+# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4        ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c]
+0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c
+
+# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp  ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c]
+0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c
+
+# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1        ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
+0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c
+
+# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04]
+0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04
+
+# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04]
+0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04
+
+# GFX950: v_pk_minimum3_f16 v8, v0, s8, v1        ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c]
+0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c
+
+# GFX950: v_pk_minimum3_f16 v8, v0, v1, s8        ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18]
+0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18

``````````

</details>


https://github.com/llvm/llvm-project/pull/117601


More information about the llvm-branch-commits mailing list