[llvm] [AMDGPU][True16][MC][CodeGen] true16 mode for v_cvt_pk_bf8/fp8_f32 (PR #141881)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Thu May 29 16:58:08 PDT 2025
================
@@ -570,6 +570,36 @@ def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> {
let HasExtVOP3DPP = 1;
}
+def VOP3_CVT_PK_F8_F32_Profile_fake16 : VOP3_Profile_Fake16<VOP_I16_F32_F32, VOP3_OPSEL> {
+ defvar Tail = (ins VGPR_32:$vdst_in, op_sel0:$op_sel);
+ let InsVOP3OpSel = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
+ 0, HasModifiers, HasSrc2Mods,
+ HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
+ Tail);
+ let InsVOP3Base = !con(getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
+ Src2VOP3DPP, NumSrcArgs, 0, HasModifiers,
+ HasSrc2Mods, HasOMod, Src0ModVOP3DPP, Src1ModVOP3DPP,
+ Src2ModVOP3DPP, false>.ret,
+ Tail);
+ let HasClamp = 0;
+ let HasExtVOP3DPP = 1;
+}
+
+def VOP3_CVT_PK_F8_F32_Profile_t16 : VOP3_Profile_True16<VOP_I16_F32_F32, VOP3_OPSEL> {
+ defvar Tail = (ins VGPR_16:$vdst_in, op_sel0:$op_sel);
----------------
rampitec wrote:
That matches my understanding. An user will expect a packed result.
https://github.com/llvm/llvm-project/pull/141881
More information about the llvm-commits
mailing list