[llvm] Main merge true16 vop3 mc more instruction 10 (PR #120600)

Brox Chen via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 19 08:26:51 PST 2024


https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/120600

>From a92fed39dd7025ee46753441d475694eb28f1d6a Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Thu, 19 Dec 2024 11:25:44 -0500
Subject: [PATCH] added missing v_med3_i/u16 fake16 pattern

---
 llvm/lib/Target/AMDGPU/SIInstructions.td | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 532df39e82a75a..41c0a565001a9a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3645,13 +3645,15 @@ multiclass FPMed3Pat<ValueType vt,
 
 multiclass Int16Med3Pat<Instruction med3Inst,
                         SDPatternOperator min,
-                        SDPatternOperator max> {
+                        SDPatternOperator max,
+                        RegisterOperand outputSrcType> {
   // This matches 16 permutations of
   // max(min(x, y), min(max(x, y), z))
   def : GCNPat <
   (max (min i16:$src0, i16:$src1),
        (min (max i16:$src0, i16:$src1), i16:$src2)),
-  (med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE)
+  (med3Inst SRCMODS.NONE, outputSrcType:$src0, SRCMODS.NONE, outputSrcType:$src1,
+            SRCMODS.NONE, outputSrcType:$src2, DSTCLAMP.NONE)
 >;
 
   // This matches 16 permutations of
@@ -3719,10 +3721,16 @@ def : FPMinCanonMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_
 def : FPMinCanonMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
 }
 
-let OtherPredicates = [isGFX9Plus] in {
-defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax>;
-defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax>;
-} // End Predicates = [isGFX9Plus]
+let SubtargetPredicate = isGFX9Plus in {
+let True16Predicate = NotHasTrue16BitInsts in {
+  defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax, VSrc_b16>;
+  defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax, VSrc_b16>;
+}
+let True16Predicate = UseFakeTrue16Insts in {
+  defm : Int16Med3Pat<V_MED3_I16_fake16_e64, smin, smax, VSrc_b16>;
+  defm : Int16Med3Pat<V_MED3_U16_fake16_e64, umin, umax, VSrc_b16>;
+}
+} // End SubtargetPredicate = [isGFX9Plus]
 
 let OtherPredicates = [isGFX12Plus] in {
 def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;



More information about the llvm-commits mailing list