[llvm] [AMDGPU] Simplify OtherPredicates handling in MadFmaMixPats. NFC. (PR #127044)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 13 03:25:31 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
This removes some of the complexity added by ad6cd7e8b259 by setting
OtherPredicates outside MadFmaMixPats rather than inside it.
---
Full diff: https://github.com/llvm/llvm-project/pull/127044.diff
1 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+8-11)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 5e825e7259a95..21898da1912f5 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -154,12 +154,10 @@ defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_
multiclass MadFmaMixPats<SDPatternOperator fma_like,
Instruction mix_inst,
Instruction mixlo_inst,
- Instruction mixhi_inst,
- bit HasFP32Denormals> {
+ Instruction mixhi_inst> {
// At least one of the operands needs to be an fpextend of an f16
// for this to be worthwhile, so we need three patterns here.
// TODO: Could we use a predicate to inspect src1/2/3 instead?
- let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]) in {
def : GCNPat <
(f32 (fma_like (f32 (VOP3PMadMixModsExt f16:$src0, i32:$src0_mods)),
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_mods)),
@@ -228,13 +226,12 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
DSTCLAMP.NONE,
(i32 (IMPLICIT_DEF)))
>;
- } // End OtherPredicates
// FIXME: Special case handling for maxhi (especially for clamp)
// because dealing with the write to high half of the register is
// difficult.
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
- let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]), True16Predicate = p in {
+ let True16Predicate = p in {
def : GCNPat <
(build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
@@ -260,9 +257,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
VGPR_32:$elt0))
>;
- } // end OtherPredicates
+ } // end True16Predicate
- let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]), True16Predicate = UseRealTrue16Insts in {
+ let True16Predicate = UseRealTrue16Insts in {
def : GCNPat <
(build_vector (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
@@ -297,7 +294,7 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
DSTCLAMP.ENABLE,
(REG_SEQUENCE VGPR_32, $elt0, lo16, (f16 (IMPLICIT_DEF)), hi16)))
>;
- } // end OtherPredicates
+ } // end True16Predicate
}
class MinimumMaximumByMinimum3Maximum3VOP3P<SDPatternOperator node,
@@ -330,9 +327,9 @@ defm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F
}
} // End FPDPRounding = 1
}
-} // OtherPredicates = [NoFP32Denormals]
-defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16, 0 /*HasFP32Denormals*/>;
+defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
+} // OtherPredicates = [NoFP32Denormals]
} // End SubtargetPredicate = HasMadMixInsts
@@ -353,7 +350,7 @@ defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F
} // End FPDPRounding = 1
}
-defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16, 1 /*HasPF32Denormals*/>;
+defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
}
// Defines patterns that extract signed 4bit from each Idx[0].
``````````
</details>
https://github.com/llvm/llvm-project/pull/127044
More information about the llvm-commits
mailing list