[llvm] [AMDGPU] Simplify OtherPredicates handling in MadFmaMixPats. NFC. (PR #127044)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 13 03:25:31 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

<details>
<summary>Changes</summary>

This removes some of the complexity added by ad6cd7e8b259 by setting
OtherPredicates outside MadFmaMixPats rather than inside it.


---
Full diff: https://github.com/llvm/llvm-project/pull/127044.diff


1 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+8-11) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 5e825e7259a95..21898da1912f5 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -154,12 +154,10 @@ defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_
 multiclass MadFmaMixPats<SDPatternOperator fma_like,
                          Instruction mix_inst,
                          Instruction mixlo_inst,
-                         Instruction mixhi_inst,
-                         bit HasFP32Denormals> {
+                         Instruction mixhi_inst> {
   // At least one of the operands needs to be an fpextend of an f16
   // for this to be worthwhile, so we need three patterns here.
   // TODO: Could we use a predicate to inspect src1/2/3 instead?
-  let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]) in {
   def : GCNPat <
     (f32 (fma_like (f32 (VOP3PMadMixModsExt f16:$src0, i32:$src0_mods)),
                    (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_mods)),
@@ -228,13 +226,12 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
                 DSTCLAMP.NONE,
                 (i32 (IMPLICIT_DEF)))
   >;
-  } // End OtherPredicates
 
   // FIXME: Special case handling for maxhi (especially for clamp)
   // because dealing with the write to high half of the register is
   // difficult.
   foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-  let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]), True16Predicate = p in {
+  let True16Predicate = p in {
 
   def : GCNPat <
     (build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
@@ -260,9 +257,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
                        VGPR_32:$elt0))
   >;
 
-  } // end OtherPredicates
+  } // end True16Predicate
 
-  let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]), True16Predicate = UseRealTrue16Insts in {
+  let True16Predicate = UseRealTrue16Insts in {
   def : GCNPat <
     (build_vector (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
                             (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
@@ -297,7 +294,7 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
                        DSTCLAMP.ENABLE,
                        (REG_SEQUENCE VGPR_32, $elt0, lo16, (f16 (IMPLICIT_DEF)), hi16)))
   >;
-  } // end OtherPredicates
+  } // end True16Predicate
 }
 
 class MinimumMaximumByMinimum3Maximum3VOP3P<SDPatternOperator node,
@@ -330,9 +327,9 @@ defm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F
 }
 } // End FPDPRounding = 1
 }
-} // OtherPredicates = [NoFP32Denormals]
 
-defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16, 0 /*HasFP32Denormals*/>;
+defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
+} // OtherPredicates = [NoFP32Denormals]
 } // End SubtargetPredicate = HasMadMixInsts
 
 
@@ -353,7 +350,7 @@ defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F
 } // End FPDPRounding = 1
 }
 
-defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16, 1 /*HasPF32Denormals*/>;
+defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
 }
 
 // Defines patterns that extract signed 4bit from each Idx[0].

``````````

</details>


https://github.com/llvm/llvm-project/pull/127044


More information about the llvm-commits mailing list