[llvm] [AMDGPU][True16][MC] true16 for more VOP1 instructions (PR #108412)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 12 13:32:39 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Brox Chen (broxigarchen)
<details>
<summary>Changes</summary>
Support true16 and fake16 format for more VOP1 instructions in MC
---
Patch is 642.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108412.diff
27 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+44-54)
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir (+3-3)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1.s (+177-123)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s (+119-119)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s (+56-35)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s (+273-126)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s (+700-238)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s (+98-98)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s (+23-23)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s (+208-208)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1.s (+296-212)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s (+140-98)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s (+63-21)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s (+273-126)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_promote.s (+700-238)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s (+168-105)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt (+256-126)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt (+203-119)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt (+77-21)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt (+196-98)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt (+36-18)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt (+210-105)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt (+257-102)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt (+85-14)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt (+295-105)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt (+197-98)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt (+37-18)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index d656e934dbedfe..ccbe3c502ade13 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -187,21 +187,17 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
let HasClamp = 1;
}
-class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
- VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
-
- let Ins64 = (ins Src0RC64:$src0, Clamp:$clamp, omod:$omod);
- let InsVOP3Base = (ins Src0VOP3DPP:$src0, Clamp:$clamp, omod:$omod);
- let AsmVOP3Base = "$vdst, $src0$clamp$omod";
-
- let HasModifiers = 0;
- let HasClamp = 1;
-}
-
def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
-def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>;
+def VOP1_F16_I16_t16 : VOPProfile_True16 <VOP_F16_I16> {
+ let HasClamp = 1;
+}
+def VOP1_F16_I16_fake16 : VOPProfile_Fake16<VOP_F16_I16> {
+ let HasModifiers = 0;
+ let HasOMod = 1;
+ let HasClamp = 1;
+}
def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{
let HasExtVOP3DPP = 0;
@@ -217,10 +213,14 @@ class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>;
def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
-def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
+def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> {
+ let HasOMod = 1;
+}
+def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16<VOP_I16_F16> {
let HasOMod = 1;
}
+
//===----------------------------------------------------------------------===//
// VOP1 Instructions
//===----------------------------------------------------------------------===//
@@ -479,24 +479,16 @@ let SubtargetPredicate = isGFX7Plus in {
} // End isReMaterializable = 1
let FPDPRounding = 1 in {
-let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
-defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
-defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
-}
-let OtherPredicates = [HasTrue16BitInsts] in {
-defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>;
-defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>;
-}
+defm V_CVT_F16_U16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_u16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, uint_to_fp>;
+defm V_CVT_F16_I16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_i16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, sint_to_fp>;
+
} // End FPDPRounding = 1
// OMod clears exceptions when set in these two instructions
-let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
-defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>;
-defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>;
-}
-let OtherPredicates = [HasTrue16BitInsts] in {
-defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>;
-defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>;
-}
+defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16",
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_uint>;
+defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16",
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>;
+
let TRANS = 1, SchedRW = [WriteTrans32] in {
defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
@@ -507,12 +499,8 @@ defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
} // End TRANS = 1, SchedRW = [WriteTrans32]
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
-let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
-defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>;
-}
-let OtherPredicates = [HasTrue16BitInsts] in {
-defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>;
-}
+defm V_FREXP_EXP_I16_F16 : VOP1Inst_t16_with_profiles <"v_frexp_exp_i16_f16",
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, int_amdgcn_frexp_exp>;
defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>;
defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>;
defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>;
@@ -560,14 +548,10 @@ let SubtargetPredicate = isGFX9Plus in {
defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
let mayRaiseFPException = 0 in {
- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
- defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>;
- defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>;
- }
- let OtherPredicates = [HasTrue16BitInsts] in {
- defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
- defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
- }
+ defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
+ defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
} // End mayRaiseFPException = 0
} // End SubtargetPredicate = isGFX9Plus
@@ -939,6 +923,14 @@ multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName,
VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
+multiclass VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<bits<9> op, string asmName,
+ string opName = NAME> {
+ defm opName#"_t16" :
+ VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_t16", asmName>;
+ defm opName#"_fake16":
+ VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_fake16", asmName>;
+}
+
multiclass VOP1Only_Real_gfx11_gfx12<bits<9> op> :
VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>;
@@ -979,10 +971,10 @@ defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16"
defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
-defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
-defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
-defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
-defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
+defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
+defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
+defm V_CVT_U16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
+defm V_CVT_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
@@ -994,7 +986,7 @@ defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16"
defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
-defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
+defm V_FREXP_EXP_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
@@ -1005,13 +997,11 @@ defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f1
defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
-defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
-defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
+defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
+defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
-defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
-defm V_CVT_F16_F32_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
-defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
-defm V_CVT_F32_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
+defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
+defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
//===----------------------------------------------------------------------===//
// GFX10.
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
index 68c0715aaafc50..265bdd0cf2f48f 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
@@ -7,12 +7,12 @@ body: |
bb.0:
; GCN-LABEL: name: cvt_hi_f32_f16
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_t16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_F16_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_fake16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_t16_e64_]], implicit $exec
+ ; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_fake16_e64_]], implicit $exec
; GCN-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_fake16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, implicit $mode, implicit $exec
%0:vgpr_32 = IMPLICIT_DEF
- %1:vgpr_32 = V_CVT_F16_U16_t16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %1:vgpr_32 = V_CVT_F16_U16_fake16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
%2:sreg_32 = COPY %1:vgpr_32
%3:sreg_32 = S_CVT_HI_F32_F16 %2:sreg_32, implicit $mode
...
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
index b80bbe161c0de4..379ae3de565aab 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
@@ -448,95 +448,119 @@ v_cvt_f16_f32 v5.h, src_scc
v_cvt_f16_f32 v127.h, 0xaf123456
// GFX11: encoding: [0xff,0x14,0xfe,0x7f,0x56,0x34,0x12,0xaf]
-v_cvt_f16_i16 v5, v1
+v_cvt_f16_i16 v5.l, v1.l
// GFX11: encoding: [0x01,0xa3,0x0a,0x7e]
-v_cvt_f16_i16 v5, v127
+v_cvt_f16_i16 v5.l, v127.l
// GFX11: encoding: [0x7f,0xa3,0x0a,0x7e]
-v_cvt_f16_i16 v5, s1
+v_cvt_f16_i16 v5.l, v1.h
+// GFX11: encoding: [0x81,0xa3,0x0a,0x7e]
+
+v_cvt_f16_i16 v5.l, v127.h
+// GFX11: encoding: [0xff,0xa3,0x0a,0x7e]
+
+v_cvt_f16_i16 v5.l, s1
// GFX11: encoding: [0x01,0xa2,0x0a,0x7e]
-v_cvt_f16_i16 v5, s105
+v_cvt_f16_i16 v5.l, s105
// GFX11: encoding: [0x69,0xa2,0x0a,0x7e]
-v_cvt_f16_i16 v5, vcc_lo
+v_cvt_f16_i16 v5.l, vcc_lo
// GFX11: encoding: [0x6a,0xa2,0x0a,0x7e]
-v_cvt_f16_i16 v5, vcc_hi
+v_cvt_f16_i16 v5.l, vcc_hi
// GFX11: encoding: [0x6b,0xa2,0x0a,0x7e]
-v_cvt_f16_i16 v5, ttmp15
+v_cvt_f16_i16 v5.l, ttmp15
// GFX11: encoding: [0x7b,0xa2,0x0a,0x7e]
-v_cvt_f16_i16 v5, m0
+v_cvt_f16_i16 v5.l, m0
// GFX11: encoding: [0x7d,0xa2,0x0a,0x7e]
-v_cvt_f16_i16 v5, exec_lo
+v_cvt_f16_i16 v5.l, exec_lo
// GFX11: encoding: [0x7e,0xa2,0x0a,0x7e]
-v_cvt_f16_i16 v5, exec_hi
+v_cvt_f16_i16 v5.l, exec_hi
// GFX11: encoding: [0x7f,0xa2,0x0a,0x7e]
-v_cvt_f16_i16 v5, null
+v_cvt_f16_i16 v5.l, null
// GFX11: encoding: [0x7c,0xa2,0x0a,0x7e]
-v_cvt_f16_i16 v5, -1
+v_cvt_f16_i16 v5.l, -1
// GFX11: encoding: [0xc1,0xa2,0x0a,0x7e]
-v_cvt_f16_i16 v5, 0.5
+v_cvt_f16_i16 v127.l, 0.5
+// GFX11: encoding: [0xf0,0xa2,0xfe,0x7e]
+
+v_cvt_f16_i16 v5.l, 0.5
+// GFX11: encoding: [0xf0,0xa2,0x0a,0x7e]
+
+v_cvt_f16_i16 v5.h, src_scc
+// GFX11: encoding: [0xfd,0xa2,0x0a,0x7f]
+
+v_cvt_f16_i16 v5.l, 0.5
// GFX11: encoding: [0xf0,0xa2,0x0a,0x7e]
-v_cvt_f16_i16 v5, src_scc
-// GFX11: encoding: [0xfd,0xa2,0x0a,0x7e]
+v_cvt_f16_i16 v5.h, src_scc
+// GFX11: encoding: [0xfd,0xa2,0x0a,0x7f]
-v_cvt_f16_i16 v127, 0xfe0b
-// GFX11: encoding: [0xff,0xa2,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
+v_cvt_f16_i16 v127.h, 0xfe0b
+// GFX11: encoding: [0xff,0xa2,0xfe,0x7f,0x0b,0xfe,0x00,0x00]
-v_cvt_f16_u16 v5, v1
+v_cvt_f16_u16 v5.l, v1.l
// GFX11: encoding: [0x01,0xa1,0x0a,0x7e]
-v_cvt_f16_u16 v5, v127
+v_cvt_f16_u16 v5.l, v127.l
// GFX11: encoding: [0x7f,0xa1,0x0a,0x7e]
-v_cvt_f16_u16 v5, s1
+v_cvt_f16_u16 v5.l, v1.h
+// GFX11: encoding: [0x81,0xa1,0x0a,0x7e]
+
+v_cvt_f16_u16 v5.l, v127.h
+// GFX11: encoding: [0xff,0xa1,0x0a,0x7e]
+
+v_cvt_f16_u16 v5.l, s1
// GFX11: encoding: [0x01,0xa0,0x0a,0x7e]
-v_cvt_f16_u16 v5, s105
+v_cvt_f16_u16 v5.l, s105
// GFX11: encoding: [0x69,0xa0,0x0a,0x7e]
-v_cvt_f16_u16 v5, vcc_lo
+v_cvt_f16_u16 v5.l, vcc_lo
// GFX11: encoding: [0x6a,0xa0,0x0a,0x7e]
-v_cvt_f16_u16 v5, vcc_hi
+v_cvt_f16_u16 v5.l, vcc_hi
// GFX11: encoding: [0x6b,0xa0,0x0a,0x7e]
-v_cvt_f16_u16 v5, ttmp15
+v_cvt_f16_u16 v5.l, ttmp15
// GFX11: encoding: [0x7b,0xa0,0x0a,0x7e]
-v_cvt_f16_u16 v5, m0
+v_cvt_f16_u16 v5.l, m0
// GFX11: encoding: [0x7d,0xa0,0x0a,0x7e]
-v_cvt_f16_u16 v5, exec_lo
+v_cvt_f16_u16 v5.l, exec_lo
// GFX11: encoding: [0x7e,0xa0,0x0a,0x7e]
-v_cvt_f16_u16 v5, exec_hi
+v_cvt_f16_u16 v5.l, exec_hi
// GFX11: encoding: [0x7f,0xa0,0x0a,0x7e]
-v_cvt_f16_u16 v5, null
+v_cvt_f16_u16 v5.l, null
// GFX11: encoding: [0x7c,0xa0,0x0a,0x7e]
-v_cvt_f16_u16 v5, -1
+v_cvt_f16_u16 v5.l, -1
// GFX11: encoding: [0xc1,0xa0,0x0a,0x7e]
+v_cvt_f16_u16 v127.l, 0.5
+// GFX11: encoding: [0xf0,0xa0,0xfe,0x7e]
+
v_cvt_f16_u16 v5, 0.5
// GFX11: encoding: [0xf0,0xa0,0x0a,0x7e]
-v_cvt_f16_u16 v5, src_scc
-// GFX11: encoding: [0xfd,0xa0,0x0a,0x7e]
+v_cvt_f16_u16 v5.h, src_scc
+// GFX11: encoding: [0xfd,0xa0,0x0a,0x7f]
-v_cvt_f16_u16 v127, 0xfe0b
-// GFX11: encoding: [0xff,0xa0,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
+v_cvt_f16_u16 v127.h, 0xfe0b
+// GFX11: encoding: [0xff,0xa0,0xfe,0x7f,0x0b,0xfe,0x00,0x00]
v_cvt_f32_f16 v5, v1.l
// GFX11: encoding: [0x01,0x17,0x0a,0x7e]
@@ -1120,50 +1144,56 @@ v_cvt_flr_i32_f32 v5, src_scc
v_cvt_flr_i32_f32 v255, 0xaf123456
// GFX11: encoding: [0xff,0x1a,0xfe,0x7f,0x56,0x34,0x12,0xaf]
-v_cvt_i16_f16 v5, v1
+v_cvt_i16_f16 v5.l, v1.l
// GFX11: encoding: [0x01,0xa7,0x0a,0x7e]
-v_cvt_i16_f16 v5, v127
+v_cvt_i16_f16 v5.l, v127.l
// GFX11: encoding: [0x7f,0xa7,0x0a,0x7e]
-v_cvt_i16_f16 v5, s1
+v_cvt_i16_f16 v5.l, v1.h
+// GFX11: encoding: [0x81,0xa7,0x0a,0x7e]
+
+v_cvt_i16_f16 v5.l, v127.h
+// GFX11: encoding: [0xff,0xa7,0x0a,0x7e]
+
+v_cvt_i16_f16 v5.l, s1
// GFX11: encoding: [0x01,0xa6,0x0a,0x7e]
-v_cvt_i16_f16 v5, s105
+v_cvt_i16_f16 v5.l, s105
// GFX11: encoding: [0x69,0xa6,0x0a,0x7e]
-v_cvt_i16_f16 v5, vcc_lo
+v_cvt_i16_f16 v5.l, vcc_lo
// GFX11: encoding: [0x6a,0xa6,0x0a,0x7e]
-v_cvt_i16_f16 v5, vcc_hi
+v_cvt_i16_f16 v5.l, vcc_hi
// GFX11: encoding: [0x6b,0xa6,0x0a,0x7e]
-v_cvt_i16_f16 v5, ttmp15
+v_cvt_i16_f16 v5.l, ttmp15
// GFX11: encoding: [0x7b,0xa6,0x0a,0x7e]
-v_cvt_i16_f16 v5, m0
+v_cvt_i16_f16 v5.l, m0
// GFX11: encoding: [0x7d,0xa6,0x0a,0x7e]
-v_cvt_i16_f16 v5, exec_lo
+v_cvt_i16_f16 v5.l, exec_lo
// GFX11: encoding: [0x7e,0xa6,0x0a,0x7e]
-v_cvt_i16_f16 v5, exec_hi
+v_cvt_i16_f16 v5.l, exec_hi
// GFX11: encoding: [0x7f,0xa6,0x0a,0x7e]
-v_cvt_i16_f16 v5, null
+v_cvt_i16_f16 v5.l, null
// GFX11: encoding: [0x7c,0xa6,0x0a,0x7e]
-v_cvt_i16_f16 v5, -1
+v_cvt_i16_f16 v5.l, -1
// GFX11: encoding: [0xc1,0xa6,0x0a,0x7e]
-v_cvt_i16_f16 v5, 0.5
-// GFX11: encoding: [0xf0,0xa6,0x0a,0x7e]
+v_cvt_i16_f16 v127.l, 0.5
+// GFX11: encoding: [0xf0,0xa6,0xfe,0x7e]
-v_cvt_i16_f16 v5, src_scc
-// GFX11: encoding: [0xfd,0xa6,0x0a,0x7e]
+v_cvt_i16_f16 v5.h, src_scc
+// GFX11: encoding: [0xfd,0xa6,0x0a,0x7f]
-v_cvt_i16_f16 v127, 0xfe0b
-// GFX11: encoding: [0xff,0xa6,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
+v_cvt_i16_f16 v127.h, 0xfe0b
+// GFX11: encoding: [0xff,0xa6,0xfe,0x7f,0x0b,0xfe,0x00,0x00]
v_cvt_i32_f32 v5, v1
// GFX11: encoding: [0x01,0x11,0x0a,0x7e]
@@ -1336,95 +1366,107 @@ v_cvt_nearest_i32_f32 v5, src_scc
v_cvt_nearest_i32_f32 v255, 0xaf123456
// GFX11: encoding: [0xff,0x18,0xfe,0x7f,0x56,0x34,0x12,0xaf]
-v_cvt_norm_i16_f16 v5, v1
+v_cvt_norm_i16_f16 v5.l, v1.l
// GFX11: encoding: [0x01,0xc7,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, v127
+v_cvt_norm_i16_f16 v5.l, v127.l
// GFX11: encoding: [0x7f,0xc7,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, s1
+v_cvt_norm_i16_f16 v5.l, v1.h
+// GFX11: encoding: [0x81,0xc7,0x0a,0x7e]
+
+v_cvt_norm_i16_f16 v5.l, v127.h
+// GFX11: encoding: [0xff,0xc7,0x0a,0x7e]
+
+v_cvt_norm_i16_f16 v5.l, s1
// GFX11: encoding: [0x01,0xc6,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, s105
+v_cvt_norm_i16_f16 v5.l, s105
// GFX11: encoding: [0x69,0xc6,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, vcc_lo
+v_cvt_norm_i16_f16 v5.l, vcc_lo
// GFX11: encoding: [0x6a,0xc6,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, vcc_hi
+v_cvt_norm_i16_f16 v5.l, vcc_hi
// GFX11: encoding: [0x6b,0xc6,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, ttmp15
+v_cvt_norm_i16_f16 v5.l, ttmp15
// GFX11: encoding: [0x7b,0xc6,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, m0
+v_cvt_norm_i16_f16 v5.l, m0
// GFX11: encoding: [0x7d,0xc6,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, exec_lo
+v_cvt_norm_i16_f16 v5.l, exec_lo
// GFX11: encoding: [0x7e,0xc6,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, exec_hi
+v_cvt_norm_i16_f16 v5.l, exec_hi
// GFX11: encoding: [0x7f,0xc6,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, null
+v_cvt_norm_i16_f16 v5.l, null
// GFX11: encoding: [0x7c,0xc6,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, -1
+v_cvt_norm_i16_f16 v5.l, -1
// GFX11: encoding: [0xc1,0xc6,0x0a,0x7e]
-v_cvt_norm_i16_f16 v5, 0.5
-// GFX11: encoding: [0xf0,0xc6,0x0a,0x7e]
+v_cvt_norm_i16_f16 v127.l, 0.5
+// GFX11: encoding: [0xf0,0xc6,0xfe,0x7e]
-v_cvt_norm_i16_f16 v5, src_scc
-// GFX11: encoding: [0xfd,0xc6,0x0a,0x7e]
+v_cvt_norm_i16_f16 v5.h, src_scc
+// GFX11: encoding: [0xfd,0xc6,0x0a,0x7f]
-v_cvt_norm_i16_f16 v127, 0xfe0b
-// GFX11: encoding: [0xff,0xc6,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
+v_cvt_norm_i16_f16 v127.h, 0xfe0b
+// GFX11: encoding: [0xff,0xc6,0xfe,0x7f,0x0b,0xfe,0x00,0x00]
-v_cvt_norm_u16_f16 v5, v1
+v_cvt_norm_u16_f16 v5.l, v1.l
// GFX11: encoding: [0x01,0xc9,0x0a,0x7e]
-v_cvt_norm_u16_f16 v5, v127
+v_cvt_norm_u16_f16 v5.l, v127.l
// GFX11: encoding: [0x7f,0xc9,0x0a,0x7e]
-v_cvt_norm_u16_f16 v5, s1
+v_cvt_norm_u16_f16 v5.l, v1.h
+// GFX11: encoding: [0x81,0xc9,0x0a,0x7e]
+
+v_cvt_norm_u16_f16 v5.l, v127.h
+// GFX11: encoding: [0xff,0xc9,0x0a,0x7e]
+
+v_cvt_norm_u16_f16 v5.l, s1
// GFX11: encoding: [0x01,0xc8,0x0a,0x7e]
-v_cvt_norm_u16_f16 v5, s105
+v_cvt_norm_u16_f16 v5.l, s105
// GFX11: encoding...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/108412
More information about the llvm-commits
mailing list