[llvm] [AMDGPU][True16][CodeGen] fp conversion in true/fake16 format (PR #101678)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 14 07:54:19 PDT 2024
================
@@ -1131,25 +1131,42 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
(cvt_f32_f16_inst_e64 SRCMODS.NEG, $src0)
>;
+ // fp_to_fp16 patterns
def : GCNPat <
- (f64 (any_fpextend f16:$src)),
- (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src))
+ (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
+ (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
>;
- // fp_to_fp16 patterns
+ // This is only used on targets without half support
+ // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
def : GCNPat <
- (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
+ (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
(cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
>;
+}
+
+let SubtargetPredicate = NotHasTrue16BitInsts in
+defm : f16_to_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
+
+let SubtargetPredicate = UseFakeTrue16Insts in
+defm : f16_to_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64>;
+
+multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64,
+ Instruction cvt_f32_f16_inst_e64,
+ RegOrImmOperand VSrc> {
+ def : GCNPat <
+ (f64 (any_fpextend f16:$src)),
+ (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src))
+ >;
----------------
arsenm wrote:
That one should also be fixed / removed. f64->f16 is one rounding. f64->f32->f16 is 2 rounding steps. You would need fast math to do this. This pattern might just be dead anyway, we have a custom expansion for it
https://github.com/llvm/llvm-project/pull/101678
More information about the llvm-commits
mailing list