[llvm] [AMDGPU][True16][CodeGen] fp conversion in true/fake16 format (PR #101678)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 14 05:31:04 PDT 2024
================
@@ -1131,25 +1131,42 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
(cvt_f32_f16_inst_e64 SRCMODS.NEG, $src0)
>;
+ // fp_to_fp16 patterns
def : GCNPat <
- (f64 (any_fpextend f16:$src)),
- (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src))
+ (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
+ (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
>;
- // fp_to_fp16 patterns
+ // This is only used on targets without half support
+ // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
def : GCNPat <
- (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
+ (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
(cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
>;
+}
+
+let SubtargetPredicate = NotHasTrue16BitInsts in
+defm : f16_to_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
+
+let SubtargetPredicate = UseFakeTrue16Insts in
+defm : f16_to_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64>;
+
+multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64,
+ Instruction cvt_f32_f16_inst_e64,
+ RegOrImmOperand VSrc> {
+ def : GCNPat <
+ (f64 (any_fpextend f16:$src)),
+ (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src))
+ >;
----------------
arsenm wrote:
This is wrong, you can't do this conversion in 2 steps. f64->f16 needs to go through custom lowering
https://github.com/llvm/llvm-project/pull/101678
More information about the llvm-commits
mailing list